TimelyDataflow · Kixiron · May 4, 2021 · May 4, 2021 · May 5, 2021 · May 5, 2021
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+    "cSpell.words": [
+        "Hashable"
+    ]
+}
diff --git a/src/algorithms/graphs/propagate.rs b/src/algorithms/graphs/propagate.rs
@@ -53,8 +53,7 @@ use operators::arrange::arrangement::Arranged;
 /// Propagates labels forward, retaining the minimum label.
 ///
 /// This variant takes a pre-arranged edge collection, to facilitate re-use, and allows
-/// a method `logic` to specify the rounds in which we introduce various labels. The output
-/// of `logic should be a number in the interval [0,64],
+/// a method `logic` to specify the rounds in which we introduce various labels.
 pub fn propagate_core<G, N, L, Tr, F, R>(edges: &Arranged<G,Tr>, nodes: &Collection<G,(N,L),R>, logic: F) -> Collection<G,(N,L),R>
 where
     G: Scope,
@@ -100,7 +99,7 @@ where
         let labels =
         proposals
             .concat(&nodes)
-            .reduce_abelian::<_,DefaultValTrace<_,_,_,_>>("Propagate", |_, s, t| t.push((s[0].0.clone(), R::from(1 as i8))));
+            .reduce_abelian::<_,DefaultValTrace<_,_,_,_>>("Propagate", |_, s, t| t.push((s[0].0.clone(), R::from(1))));
 
         let propagate: Collection<_, (N, L), R> =
         labels

diff --git a/src/consolidation.rs b/src/consolidation.rs
@@ -55,8 +55,8 @@ pub fn consolidate_slice<T: Ord, R: Semigroup>(slice: &mut [(T, R)]) -> usize {
             assert!(offset < index);
 
             // LOOP INVARIANT: offset < index
-            let ptr1 = slice.as_mut_ptr().offset(offset as isize);
-            let ptr2 = slice.as_mut_ptr().offset(index as isize);
+            let ptr1 = slice.as_mut_ptr().add(offset);
+            let ptr2 = slice.as_mut_ptr().add(index);
 
             if (*ptr1).0 == (*ptr2).0 {
                 (*ptr1).1.plus_equals(&(*ptr2).1);
@@ -65,7 +65,7 @@ pub fn consolidate_slice<T: Ord, R: Semigroup>(slice: &mut [(T, R)]) -> usize {
                 if !(*ptr1).1.is_zero() {
                     offset += 1;
                 }
-                let ptr1 = slice.as_mut_ptr().offset(offset as isize);
+                let ptr1 = slice.as_mut_ptr().add(offset);
                 std::mem::swap(&mut *ptr1, &mut *ptr2);
             }
         }
@@ -118,8 +118,9 @@ pub fn consolidate_updates_slice<D: Ord, T: Ord, R: Semigroup>(slice: &mut [(D,
         unsafe {
 
             // LOOP INVARIANT: offset < index
-            let ptr1 = slice.as_mut_ptr().offset(offset as isize);
-            let ptr2 = slice.as_mut_ptr().offset(index as isize);
+            debug_assert!(offset < index);
+            let ptr1 = slice.as_mut_ptr().add(offset);
+            let ptr2 = slice.as_mut_ptr().add(index);
 
             if (*ptr1).0 == (*ptr2).0 && (*ptr1).1 == (*ptr2).1 {
                 (*ptr1).2.plus_equals(&(*ptr2).2);
@@ -128,7 +129,7 @@ pub fn consolidate_updates_slice<D: Ord, T: Ord, R: Semigroup>(slice: &mut [(D,
                 if !(*ptr1).2.is_zero() {
                     offset += 1;
                 }
-                let ptr1 = slice.as_mut_ptr().offset(offset as isize);
+                let ptr1 = slice.as_mut_ptr().add(offset);
                 std::mem::swap(&mut *ptr1, &mut *ptr2);
             }
 

diff --git a/src/input.rs b/src/input.rs
@@ -247,7 +247,7 @@ impl<T: Timestamp+Clone, D: Data, R: Semigroup> InputSession<T, D, R> {
     /// Adds to the weight of an element in the collection.
     pub fn update(&mut self, element: D, change: R) {
         if self.buffer.len() == self.buffer.capacity() {
-            if self.buffer.len() > 0 {
+            if !self.buffer.is_empty() {
                 self.handle.send_batch(&mut self.buffer);
             }
             // TODO : This is a fairly arbitrary choice; should probably use `Context::default_size()` or such.
@@ -260,7 +260,7 @@ impl<T: Timestamp+Clone, D: Data, R: Semigroup> InputSession<T, D, R> {
     pub fn update_at(&mut self, element: D, time: T, change: R) {
         assert!(self.time.less_equal(&time));
         if self.buffer.len() == self.buffer.capacity() {
-            if self.buffer.len() > 0 {
+            if !self.buffer.is_empty() {
                 self.handle.send_batch(&mut self.buffer);
             }
             // TODO : This is a fairly arbitrary choice; should probably use `Context::default_size()` or such.
@@ -301,6 +301,17 @@ impl<T: Timestamp+Clone, D: Data, R: Semigroup> InputSession<T, D, R> {
     pub fn close(self) { }
 }
 
+impl<T, D, R> Default for InputSession<T, D, R>
+where
+    T: Timestamp + Clone,
+    D: Data,
+    R: Semigroup,
+{
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl<T: Timestamp+Clone, D: Data, R: Semigroup> Drop for InputSession<T, D, R> {
     fn drop(&mut self) {
         self.flush();

diff --git a/src/operators/consolidate.rs b/src/operators/consolidate.rs
@@ -6,18 +6,27 @@
 //! underlying system can more clearly see that no work must be done in the later case, and we can
 //! drop out of, e.g. iterative computations.
 
-use timely::dataflow::Scope;
-
-use ::{Collection, ExchangeData, Hashable};
-use ::difference::Semigroup;
-use operators::arrange::arrangement::Arrange;
+use crate::{
+    difference::Semigroup,
+    lattice::Lattice,
+    operators::arrange::{Arrange, Arranged, TraceAgent},
+    trace::{implementations::ord::OrdKeySpine, Batch, Cursor, Trace, TraceReader},
+    AsCollection, Collection, ExchangeData, Hashable,
+};
+use timely::dataflow::{channels::pact::Pipeline, operators::Operator, Scope};
 
 /// An extension method for consolidating weighted streams.
-pub trait Consolidate<D: ExchangeData+Hashable> : Sized {
+pub trait Consolidate<S, D, R>: Sized
+where
+    S: Scope,
+    S::Timestamp: Lattice,
+    D: ExchangeData + Hashable,
+    R: Semigroup,
+{
     /// Aggregates the weights of equal records into at most one record.
     ///
-    /// This method uses the type `D`'s `hashed()` method to partition the data. The data are
-    /// accumulated in place, each held back until their timestamp has completed.
+    /// This method uses the type `D`'s [`hashed()`](Hashable) method to partition the data.
+    /// The data is accumulated in place and held back until its timestamp has completed.
     ///
     /// # Examples
     ///
@@ -40,30 +49,72 @@ pub trait Consolidate<D: ExchangeData+Hashable> : Sized {
     ///     });
     /// }
     /// ```
-    fn consolidate(&self) -> Self {
+    fn consolidate(&self) -> Collection<S, D, R> {
         self.consolidate_named("Consolidate")
     }
 
-    /// As `consolidate` but with the ability to name the operator.
-    fn consolidate_named(&self, name: &str) -> Self;
+    /// A `consolidate` but with the ability to name the operator.
+    fn consolidate_named(&self, name: &str) -> Collection<S, D, R> {
+        self.consolidate_core::<OrdKeySpine<_, _, _>>(name)
+            .as_collection(|data, &()| data.clone())
+    }
+
+    /// A `consolidate` that returns the intermediate [arrangement](Arranged)
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use differential_dataflow::{
+    ///     input::Input,
+    ///     operators::{Consolidate, JoinCore},
+    /// };
+    ///
+    /// timely::example(|scope| {
+    ///     let (_, collection) = scope.new_collection_from(0..10u32);
+    ///
+    ///     let keys = collection
+    ///         .flat_map(|x| (0..x))
+    ///         .concat(&collection.negate())
+    ///         .consolidate_arranged();
+    ///
+    ///     collection
+    ///         .map(|x| (x, x * 2))
+    ///         .join_core(&keys, |&key, &value, &()| Some((key, value)))
+    ///         .inspect(|x| println!("{:?}", x));
+    /// });
+    /// ```
+    fn consolidate_arranged(&self) -> Arranged<S, TraceAgent<OrdKeySpine<D, S::Timestamp, R>>> {
+        self.consolidate_core::<OrdKeySpine<_, _, _>>("Consolidate")
+    }
+
+    /// Aggregates the weights of equal records into at most one record,
+    /// returning the intermediate [arrangement](Arranged)
+    fn consolidate_core<Tr>(&self, name: &str) -> Arranged<S, TraceAgent<Tr>>
+    where
+        Tr: Trace + TraceReader<Key = D, Val = (), Time = S::Timestamp, R = R> + 'static,
+        Tr::Batch: Batch<Tr::Key, Tr::Val, Tr::Time, Tr::R>,
+        Tr::Cursor: Cursor<Tr::Key, Tr::Val, Tr::Time, Tr::R>;
 }
 
-impl<G: Scope, D, R> Consolidate<D> for Collection<G, D, R>
+impl<S, D, R> Consolidate<S, D, R> for Collection<S, D, R>
 where
-    D: ExchangeData+Hashable,
-    R: ExchangeData+Semigroup,
-    G::Timestamp: ::lattice::Lattice+Ord,
- {
-    fn consolidate_named(&self, name: &str) -> Self {
-        use trace::implementations::ord::OrdKeySpine as DefaultKeyTrace;
-        self.map(|k| (k, ()))
-            .arrange_named::<DefaultKeyTrace<_,_,_>>(name)
-            .as_collection(|d: &D, _| d.clone())
+    S: Scope,
+    S::Timestamp: Lattice + Ord,
+    D: ExchangeData + Hashable,
+    R: ExchangeData + Semigroup,
+{
+    fn consolidate_core<Tr>(&self, name: &str) -> Arranged<S, TraceAgent<Tr>>
+    where
+        Tr: Trace + TraceReader<Key = D, Val = (), Time = S::Timestamp, R = R> + 'static,
+        Tr::Batch: Batch<Tr::Key, Tr::Val, Tr::Time, Tr::R>,
+        Tr::Cursor: Cursor<Tr::Key, Tr::Val, Tr::Time, Tr::R>,
+    {
+        self.map(|key| (key, ())).arrange_named(name)
     }
 }
 
 /// An extension method for consolidating weighted streams.
-pub trait ConsolidateStream<D: ExchangeData+Hashable> {
+pub trait ConsolidateStream<D: ExchangeData + Hashable> {
     /// Aggregates the weights of equal records.
     ///
     /// Unlike `consolidate`, this method does not exchange data and does not
@@ -98,19 +149,13 @@ pub trait ConsolidateStream<D: ExchangeData+Hashable> {
 
 impl<G: Scope, D, R> ConsolidateStream<D> for Collection<G, D, R>
 where
-    D: ExchangeData+Hashable,
-    R: ExchangeData+Semigroup,
-    G::Timestamp: ::lattice::Lattice+Ord,
- {
+    D: ExchangeData + Hashable,
+    R: ExchangeData + Semigroup,
+    G::Timestamp: Lattice + Ord,
+{
     fn consolidate_stream(&self) -> Self {
-
-        use timely::dataflow::channels::pact::Pipeline;
-        use timely::dataflow::operators::Operator;
-        use collection::AsCollection;
-
         self.inner
             .unary(Pipeline, "ConsolidateStream", |_cap, _info| {
-
                 let mut vector = Vec::new();
                 move |input, output| {
                     input.for_each(|time, data| {