histogrammar
diff --git a/‎histogrammar/defs.py‎
Lines changed: 10 additions & 2 deletions b/‎histogrammar/defs.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎histogrammar/primitives/average.py‎
Lines changed: 10 additions & 0 deletions b/‎histogrammar/primitives/average.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎histogrammar/primitives/bag.py‎
Lines changed: 10 additions & 0 deletions b/‎histogrammar/primitives/bag.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎histogrammar/primitives/bin.py‎
Lines changed: 24 additions & 0 deletions b/‎histogrammar/primitives/bin.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎histogrammar/primitives/categorize.py‎
Lines changed: 16 additions & 0 deletions b/‎histogrammar/primitives/categorize.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎histogrammar/primitives/centrallybin.py‎
Lines changed: 13 additions & 0 deletions b/‎histogrammar/primitives/centrallybin.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎histogrammar/primitives/collection.py‎
Lines changed: 62 additions & 0 deletions b/‎histogrammar/primitives/collection.py‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎histogrammar/primitives/count.py‎
Lines changed: 11 additions & 0 deletions b/‎histogrammar/primitives/count.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎histogrammar/primitives/deviate.py‎
Lines changed: 11 additions & 0 deletions b/‎histogrammar/primitives/deviate.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎histogrammar/primitives/fraction.py‎
Lines changed: 13 additions & 0 deletions b/‎histogrammar/primitives/fraction.py‎
Lines changed: 13 additions & 0 deletions
@@ -163,6 +163,10 @@ def __add__(self, other):
         """Add two containers of the same type. The originals are unaffected."""
         raise NotImplementedError
 
+    def __iadd__(self, other):
+        """Add other to self; other is unaffected, but self is changed in place."""
+        raise NotImplementedError
+
     def __mul__(self, factor):
         """Reweight the contents in all nested aggregators by a scalar factor, as though they had been filled with a different weight. The original is unaffected."""
         raise NotImplementedError
@@ -1090,8 +1094,12 @@ def _makeNPWeights(self, weights, shape):
         else:
             return weights * numpy.ones(shape, dtype=numpy.float64)
 
-    def fillsparksql(self, data):
-        pass
+    def fillsparksql(self, df):
+        converter = df._sc._jvm.org.dianahep.histogrammar.sparksql.pyspark.AggregatorConverter()
+        agg = self._sparksql(df._sc._jvm, converter)
+        result = converter.histogrammar(df._jdf, agg)
+        delta = Factory.fromJson(jsonlib.loads(result.toJsonString()))
+        self += delta
 
 # useful functions
 
 
@@ -87,6 +87,13 @@ def __add__(self, other):
         else:
             raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
 
+    @inheritdoc(Container)
+    def __iadd__(self, other):
+        both = self + other
+        self.entries = both.entries
+        self.mean = both.mean
+        return self
+
     @inheritdoc(Container)
     def __mul__(self, factor):
         if math.isnan(factor) or factor <= 0.0:
@@ -271,6 +278,9 @@ def _numpy(self, data, weights, shape):
             mb = numpy.average(q, weights=weights)
             self.mean = float((ca*ma + (ca_plus_cb - ca)*mb) / ca_plus_cb)
 
+    def _sparksql(self, jvm, converter):
+        return converter.Average(self.quantity.asSparkSQL())
+
     @property
     def children(self):
         """List of sub-aggregators, to make it possible to walk the tree."""
 
@@ -102,6 +102,13 @@ def __add__(self, other):
         else:
             raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
 
+    @inheritdoc(Container)
+    def __iadd__(self, other):
+        both = self + other
+        self.entries = other.entries
+        self.values = other.values
+        return self
+
     @inheritdoc(Container)
     def __mul__(self, factor):
         if math.isnan(factor) or factor <= 0.0:
@@ -231,6 +238,9 @@ def _numpy(self, data, weights, shape):
                 if isinstance(x, numpy.ndarray):
                     x = x.tolist()
                 self._update(x, float(w))
+
+    def _sparksql(self, jvm, converter):
+        return converter.Bag(self.quantity.asSparkSQL(), range)
 
     @property
     def children(self):
 
@@ -173,6 +173,27 @@ def __add__(self, other):
         else:
             raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
 
+    @inheritdoc(Container)
+    def __iadd__(self, other):
+        if isinstance(other, Bin):
+            if self.low != other.low:
+                raise ContainerException("cannot add Bins because low differs ({0} vs {1})".format(self.low, other.low))
+            if self.high != other.high:
+                raise ContainerException("cannot add Bins because high differs ({0} vs {1})".format(self.high, other.high))
+            if len(self.values) != len(other.values):
+                raise ContainerException("cannot add Bins because nubmer of values differs ({0} vs {1})".format(len(self.values), len(other.values)))
+            if len(self.values) == 0:
+                raise ContainerException("cannot add Bins because number of values is zero")
+            self.entries += other.entries
+            for x, y in zip(self.values, other.values):
+                x += y
+            self.underflow += other.underflow
+            self.overflow += other.overflow
+            self.nanflow += other.nanflow
+            return self
+        else:
+            raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
+
     @inheritdoc(Container)
     def __mul__(self, factor):
         if math.isnan(factor) or factor <= 0.0:
@@ -448,6 +469,9 @@ def _numpy(self, data, weights, shape):
         # no possibility of exception from here on out (for rollback)
         self.entries += float(newentries)
 
+    def _sparksql(self, jvm, converter):
+        return converter.Bin(len(self.values), self.low, self.high, self.quantity.asSparkSQL(), self.values[0]._sparksql(jvm, converter), self.underflow._sparksql(jvm, converter), self.overflow._sparksql(jvm, converter), self.nanflow._sparksql(jvm, converter))
+
     @property
     def children(self):
         """List of sub-aggregators, to make it possible to walk the tree."""
 
@@ -142,6 +142,19 @@ def __add__(self, other):
         else:
             raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
 
+    @inheritdoc(Container)
+    def __iadd__(self, other):
+        if isinstance(other, Categorize):
+            self.entries += other.entries
+            for k in self.keySet.union(other.keySet):
+                if k in self.bins and k in other.bins:
+                    bins[k] += other.bins[k]
+                elif k not in self.bins and k in other.bins:
+                    bins[k] = self.bins[k].copy()
+            return self
+        else:
+            raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
+
     @inheritdoc(Container)
     def __mul__(self, factor):
         if math.isnan(factor) or factor <= 0.0:
@@ -235,6 +248,9 @@ def _numpy(self, data, weights, shape):
         # no possibility of exception from here on out (for rollback)
         self.entries += float(weights.sum())
 
+    def _sparksql(self, jvm, converter):
+        return converter.Categorize(self.quantity.asSparkSQL(), self.value._sparksql(jvm, converter))
+
     @property
     def children(self):
         """List of sub-aggregators, to make it possible to walk the tree."""
 
@@ -172,6 +172,16 @@ def __add__(self, other):
         out.bins = newbins
         return out.specialize()
 
+    @inheritdoc(Container)
+    def __iadd__(self, other):
+        if self.centers != other.centers:
+            raise ContainerException("cannot add CentrallyBin because centers are different:\n    {0}\nvs\n    {1}".format(self.centers, other.centers))
+        self.entries += other.entries
+        for (c1, v1), (_, v2) in zip(self.bins, other.bins):
+            v1 += v2
+        self.nanflow += other.nanflow
+        return self
+
     @inheritdoc(Container)
     def __mul__(self, factor):
         if math.isnan(factor) or factor <= 0.0:
@@ -397,6 +407,9 @@ def _numpy(self, data, weights, shape):
         # no possibility of exception from here on out (for rollback)
         self.entries += float(newentries)
 
+    def _sparksql(self, jvm, converter):
+        return converter.CentrallyBin([c for c, v in self.bins], self.quantity.asSparkSQL(), self.bins[0][1]._sparksql(jvm, converter), self.nanflow._sparksql(jvm, converter))
+
     @property
     def children(self):
         """List of sub-aggregators, to make it possible to walk the tree."""
 
@@ -319,6 +319,19 @@ def __add__(self, other):
         else:
             raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
 
+    @inheritdoc(Container)
+    def __iadd__(self, other):
+        if isinstance(other, Label):
+            if self.keySet != other.keySet:
+                raise ContainerException("cannot add Labels because keys differ:\n    {0}\n    {1}".format(", ".join(sorted(self.keys)), ", ".join(sorted(other.keys))))
+            self.entries += other.entries
+            for k in self.keys:
+                v = self(k)
+                v += other(k)
+            return self
+        else:
+            raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
+
     @inheritdoc(Container)
     def __mul__(self, factor):
         if math.isnan(factor) or factor <= 0.0:
@@ -360,6 +373,9 @@ def _numpy(self, data, weights, shape):
         else:
             self.entries += float(weights * shape[0])
 
+    def _sparksql(self, jvm, converter):
+        return converter.Label([jvm.scala.Tuple2(k, v._sparksql(jvm, converter)) for k, v in self.pairs.items()])
+
     @property
     def children(self):
         """List of sub-aggregators, to make it possible to walk the tree."""
@@ -517,6 +533,19 @@ def __add__(self, other):
         else:
             raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
 
+    @inheritdoc(Container)
+    def __iadd__(self, other):
+        if isinstance(other, UntypedLabel):
+            if self.keySet != other.keySet:
+                raise ContainerException("cannot add UntypedLabels because keys differ:\n    {0}\n    {1}".format(", ".join(sorted(self.keys)), ", ".join(sorted(other.keys))))
+            self.entries += other.entries
+            for k in self.keys:
+                v = self(k)
+                v += other(k)
+            return self
+        else:
+            raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
+
     @inheritdoc(Container)
     def __mul__(self, factor):
         if math.isnan(factor) or factor <= 0.0:
@@ -558,6 +587,9 @@ def _numpy(self, data, weights, shape):
         else:
             self.entries += float(weights * shape[0])
 
+    def _sparksql(self, jvm, converter):
+        return converter.UntypedLabel([jvm.scala.Tuple2(k, v._sparksql(jvm, converter)) for k, v in self.pairs.items()])
+
     @property
     def children(self):
         """List of sub-aggregators, to make it possible to walk the tree."""
@@ -723,6 +755,18 @@ def __add__(self, other):
         else:
             raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
 
+    @inheritdoc(Container)
+    def __iadd__(self, other):
+        if isinstance(other, Index):
+            if self.size != other.size:
+                raise ContainerException("cannot add Indexes because they have different sizes: ({0} vs {1})".format(self.size, other.size))
+            self.entries += other.entries
+            for x, y in zip(self.values, other.values):
+                x += y
+            return self
+        else:
+            raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
+
     @inheritdoc(Container)
     def __mul__(self, factor):
         if math.isnan(factor) or factor <= 0.0:
@@ -762,6 +806,9 @@ def _numpy(self, data, weights, shape):
         else:
             self.entries += float(weights * shape[0])
 
+    def _sparksql(self, jvm, converter):
+        return converter.Index([v._sparksql(jvm, converter) for v in self.values])
+
     @property
     def children(self):
         """List of sub-aggregators, to make it possible to walk the tree."""
@@ -934,6 +981,18 @@ def __add__(self, other):
         else:
             raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
 
+    @inheritdoc(Container)
+    def __iadd__(self, other):
+        if isinstance(other, Branch):
+            if self.size != other.size:
+                raise ContainerException("cannot add Branches because they have different sizes: ({0} vs {1})".format(self.size, other.size))
+            self.entries += other.entries
+            for x, y in zip(self.values, other.values):
+                x += y
+            return self
+        else:
+            raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
+
     @inheritdoc(Container)
     def __mul__(self, factor):
         if math.isnan(factor) or factor <= 0.0:
@@ -973,6 +1032,9 @@ def _numpy(self, data, weights, shape):
         else:
             self.entries += float(weights * shape[0])
 
+    def _sparksql(self, jvm, converter):
+        return converter.Branch(*[v._sparksql(jvm, converter) for v in self.values])
+
     @property
     def children(self):
         """List of sub-aggregators, to make it possible to walk the tree."""
 
@@ -83,6 +83,14 @@ def __add__(self, other):
         else:
             raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
 
+    @inheritdoc(Container)
+    def __iadd__(self, other):
+        if isinstance(other, Count):
+            self.entries += other.entries
+            return self
+        else:
+            raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
+
     @inheritdoc(Container)
     def __mul__(self, factor):
         if self.transform != identity or \
@@ -183,6 +191,9 @@ def _numpy(self, data, weights, shape):
         else:
             raise ValueError("cannot use Numpy to fill an isolated Count (unless the weights are given as an array)")
 
+    def _sparksql(self, jvm, converter):
+        return converter.Count()   # TODO: handle transform
+
     @property
     def children(self):
         """List of sub-aggregators, to make it possible to walk the tree."""
 
@@ -109,6 +109,14 @@ def __add__(self, other):
         else:
             raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
 
+    @inheritdoc(Container)
+    def __iadd__(self, other):
+        both = self + other
+        self.entries = both.entries
+        self.mean = both.mean
+        self.varianceTimesEntries = both.varianceTimesEntries
+        return self
+
     @inheritdoc(Container)
     def __mul__(self, factor):
         if math.isnan(factor) or factor <= 0.0:
@@ -331,6 +339,9 @@ def _numpy(self, data, weights, shape):
             self.mean = float((ca*ma + (ca_plus_cb - ca)*mb) / ca_plus_cb)
             self.varianceTimesEntries = float(sa + sb + ca*ma*ma + cb*mb*mb - 2.0*self.mean*(ca*ma + cb*mb) + self.mean*self.mean*ca_plus_cb)
 
+    def _sparksql(self, jvm, converter):
+        return converter.Deviate(quantity.asSparkSQL())
+
     @property
     def children(self):
         """List of sub-aggregators, to make it possible to walk the tree."""
 
@@ -119,6 +119,16 @@ def __add__(self, other):
         else:
             raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
 
+    @inheritdoc(Container)
+    def __iadd__(self, other):
+        if isinstance(other, Fraction):
+            self.entries += other.entries
+            self.numerator += other.numerator
+            self.denominator += other.denominator
+            return self
+        else:
+            raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
+
     @inheritdoc(Container)
     def __mul__(self, factor):
         if math.isnan(factor) or factor <= 0.0:
@@ -249,6 +259,9 @@ def _numpy(self, data, weights, shape):
         # no possibility of exception from here on out (for rollback)
         self.entries += float(weights.sum())
 
+    def _sparksql(self, jvm, converter):
+        return converter.Fraction(quantity.asSparkSQL(), self.numerator._sparksql(jvm, converter))
+
     @property
     def children(self):
         """List of sub-aggregators, to make it possible to walk the tree."""