Skip to content

Commit aa54a7f

Browse files
committed
simple filling example is working
1 parent b0aa776 commit aa54a7f

File tree

17 files changed

+281
-3
lines changed

17 files changed

+281
-3
lines changed

histogrammar/defs.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,10 @@ def __add__(self, other):
163163
"""Add two containers of the same type. The originals are unaffected."""
164164
raise NotImplementedError
165165

166+
def __iadd__(self, other):
167+
"""Add other to self; other is unaffected, but self is changed in place."""
168+
raise NotImplementedError
169+
166170
def __mul__(self, factor):
167171
"""Reweight the contents in all nested aggregators by a scalar factor, as though they had been filled with a different weight. The original is unaffected."""
168172
raise NotImplementedError
@@ -1090,8 +1094,12 @@ def _makeNPWeights(self, weights, shape):
10901094
else:
10911095
return weights * numpy.ones(shape, dtype=numpy.float64)
10921096

1093-
def fillsparksql(self, data):
1094-
pass
1097+
def fillsparksql(self, df):
1098+
converter = df._sc._jvm.org.dianahep.histogrammar.sparksql.pyspark.AggregatorConverter()
1099+
agg = self._sparksql(df._sc._jvm, converter)
1100+
result = converter.histogrammar(df._jdf, agg)
1101+
delta = Factory.fromJson(jsonlib.loads(result.toJsonString()))
1102+
self += delta
10951103

10961104
# useful functions
10971105

histogrammar/primitives/average.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,13 @@ def __add__(self, other):
8787
else:
8888
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
8989

90+
@inheritdoc(Container)
91+
def __iadd__(self, other):
92+
both = self + other
93+
self.entries = both.entries
94+
self.mean = both.mean
95+
return self
96+
9097
@inheritdoc(Container)
9198
def __mul__(self, factor):
9299
if math.isnan(factor) or factor <= 0.0:
@@ -271,6 +278,9 @@ def _numpy(self, data, weights, shape):
271278
mb = numpy.average(q, weights=weights)
272279
self.mean = float((ca*ma + (ca_plus_cb - ca)*mb) / ca_plus_cb)
273280

281+
def _sparksql(self, jvm, converter):
282+
return converter.Average(self.quantity.asSparkSQL())
283+
274284
@property
275285
def children(self):
276286
"""List of sub-aggregators, to make it possible to walk the tree."""

histogrammar/primitives/bag.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,13 @@ def __add__(self, other):
102102
else:
103103
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
104104

105+
@inheritdoc(Container)
106+
def __iadd__(self, other):
107+
both = self + other
108+
self.entries = other.entries
109+
self.values = other.values
110+
return self
111+
105112
@inheritdoc(Container)
106113
def __mul__(self, factor):
107114
if math.isnan(factor) or factor <= 0.0:
@@ -231,6 +238,9 @@ def _numpy(self, data, weights, shape):
231238
if isinstance(x, numpy.ndarray):
232239
x = x.tolist()
233240
self._update(x, float(w))
241+
242+
def _sparksql(self, jvm, converter):
243+
return converter.Bag(self.quantity.asSparkSQL(), range)
234244

235245
@property
236246
def children(self):

histogrammar/primitives/bin.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,27 @@ def __add__(self, other):
173173
else:
174174
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
175175

176+
@inheritdoc(Container)
177+
def __iadd__(self, other):
178+
if isinstance(other, Bin):
179+
if self.low != other.low:
180+
raise ContainerException("cannot add Bins because low differs ({0} vs {1})".format(self.low, other.low))
181+
if self.high != other.high:
182+
raise ContainerException("cannot add Bins because high differs ({0} vs {1})".format(self.high, other.high))
183+
if len(self.values) != len(other.values):
184+
raise ContainerException("cannot add Bins because nubmer of values differs ({0} vs {1})".format(len(self.values), len(other.values)))
185+
if len(self.values) == 0:
186+
raise ContainerException("cannot add Bins because number of values is zero")
187+
self.entries += other.entries
188+
for x, y in zip(self.values, other.values):
189+
x += y
190+
self.underflow += other.underflow
191+
self.overflow += other.overflow
192+
self.nanflow += other.nanflow
193+
return self
194+
else:
195+
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
196+
176197
@inheritdoc(Container)
177198
def __mul__(self, factor):
178199
if math.isnan(factor) or factor <= 0.0:
@@ -448,6 +469,9 @@ def _numpy(self, data, weights, shape):
448469
# no possibility of exception from here on out (for rollback)
449470
self.entries += float(newentries)
450471

472+
def _sparksql(self, jvm, converter):
473+
return converter.Bin(len(self.values), self.low, self.high, self.quantity.asSparkSQL(), self.values[0]._sparksql(jvm, converter), self.underflow._sparksql(jvm, converter), self.overflow._sparksql(jvm, converter), self.nanflow._sparksql(jvm, converter))
474+
451475
@property
452476
def children(self):
453477
"""List of sub-aggregators, to make it possible to walk the tree."""

histogrammar/primitives/categorize.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,19 @@ def __add__(self, other):
142142
else:
143143
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
144144

145+
@inheritdoc(Container)
146+
def __iadd__(self, other):
147+
if isinstance(other, Categorize):
148+
self.entries += other.entries
149+
for k in self.keySet.union(other.keySet):
150+
if k in self.bins and k in other.bins:
151+
bins[k] += other.bins[k]
152+
elif k not in self.bins and k in other.bins:
153+
bins[k] = self.bins[k].copy()
154+
return self
155+
else:
156+
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
157+
145158
@inheritdoc(Container)
146159
def __mul__(self, factor):
147160
if math.isnan(factor) or factor <= 0.0:
@@ -235,6 +248,9 @@ def _numpy(self, data, weights, shape):
235248
# no possibility of exception from here on out (for rollback)
236249
self.entries += float(weights.sum())
237250

251+
def _sparksql(self, jvm, converter):
252+
return converter.Categorize(self.quantity.asSparkSQL(), self.value._sparksql(jvm, converter))
253+
238254
@property
239255
def children(self):
240256
"""List of sub-aggregators, to make it possible to walk the tree."""

histogrammar/primitives/centrallybin.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,16 @@ def __add__(self, other):
172172
out.bins = newbins
173173
return out.specialize()
174174

175+
@inheritdoc(Container)
176+
def __iadd__(self, other):
177+
if self.centers != other.centers:
178+
raise ContainerException("cannot add CentrallyBin because centers are different:\n {0}\nvs\n {1}".format(self.centers, other.centers))
179+
self.entries += other.entries
180+
for (c1, v1), (_, v2) in zip(self.bins, other.bins):
181+
v1 += v2
182+
self.nanflow += other.nanflow
183+
return self
184+
175185
@inheritdoc(Container)
176186
def __mul__(self, factor):
177187
if math.isnan(factor) or factor <= 0.0:
@@ -397,6 +407,9 @@ def _numpy(self, data, weights, shape):
397407
# no possibility of exception from here on out (for rollback)
398408
self.entries += float(newentries)
399409

410+
def _sparksql(self, jvm, converter):
411+
return converter.CentrallyBin([c for c, v in self.bins], self.quantity.asSparkSQL(), self.bins[0][1]._sparksql(jvm, converter), self.nanflow._sparksql(jvm, converter))
412+
400413
@property
401414
def children(self):
402415
"""List of sub-aggregators, to make it possible to walk the tree."""

histogrammar/primitives/collection.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,19 @@ def __add__(self, other):
319319
else:
320320
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
321321

322+
@inheritdoc(Container)
323+
def __iadd__(self, other):
324+
if isinstance(other, Label):
325+
if self.keySet != other.keySet:
326+
raise ContainerException("cannot add Labels because keys differ:\n {0}\n {1}".format(", ".join(sorted(self.keys)), ", ".join(sorted(other.keys))))
327+
self.entries += other.entries
328+
for k in self.keys:
329+
v = self(k)
330+
v += other(k)
331+
return self
332+
else:
333+
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
334+
322335
@inheritdoc(Container)
323336
def __mul__(self, factor):
324337
if math.isnan(factor) or factor <= 0.0:
@@ -360,6 +373,9 @@ def _numpy(self, data, weights, shape):
360373
else:
361374
self.entries += float(weights * shape[0])
362375

376+
def _sparksql(self, jvm, converter):
377+
return converter.Label([jvm.scala.Tuple2(k, v._sparksql(jvm, converter)) for k, v in self.pairs.items()])
378+
363379
@property
364380
def children(self):
365381
"""List of sub-aggregators, to make it possible to walk the tree."""
@@ -517,6 +533,19 @@ def __add__(self, other):
517533
else:
518534
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
519535

536+
@inheritdoc(Container)
537+
def __iadd__(self, other):
538+
if isinstance(other, UntypedLabel):
539+
if self.keySet != other.keySet:
540+
raise ContainerException("cannot add UntypedLabels because keys differ:\n {0}\n {1}".format(", ".join(sorted(self.keys)), ", ".join(sorted(other.keys))))
541+
self.entries += other.entries
542+
for k in self.keys:
543+
v = self(k)
544+
v += other(k)
545+
return self
546+
else:
547+
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
548+
520549
@inheritdoc(Container)
521550
def __mul__(self, factor):
522551
if math.isnan(factor) or factor <= 0.0:
@@ -558,6 +587,9 @@ def _numpy(self, data, weights, shape):
558587
else:
559588
self.entries += float(weights * shape[0])
560589

590+
def _sparksql(self, jvm, converter):
591+
return converter.UntypedLabel([jvm.scala.Tuple2(k, v._sparksql(jvm, converter)) for k, v in self.pairs.items()])
592+
561593
@property
562594
def children(self):
563595
"""List of sub-aggregators, to make it possible to walk the tree."""
@@ -723,6 +755,18 @@ def __add__(self, other):
723755
else:
724756
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
725757

758+
@inheritdoc(Container)
759+
def __iadd__(self, other):
760+
if isinstance(other, Index):
761+
if self.size != other.size:
762+
raise ContainerException("cannot add Indexes because they have different sizes: ({0} vs {1})".format(self.size, other.size))
763+
self.entries += other.entries
764+
for x, y in zip(self.values, other.values):
765+
x += y
766+
return self
767+
else:
768+
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
769+
726770
@inheritdoc(Container)
727771
def __mul__(self, factor):
728772
if math.isnan(factor) or factor <= 0.0:
@@ -762,6 +806,9 @@ def _numpy(self, data, weights, shape):
762806
else:
763807
self.entries += float(weights * shape[0])
764808

809+
def _sparksql(self, jvm, converter):
810+
return converter.Index([v._sparksql(jvm, converter) for v in self.values])
811+
765812
@property
766813
def children(self):
767814
"""List of sub-aggregators, to make it possible to walk the tree."""
@@ -934,6 +981,18 @@ def __add__(self, other):
934981
else:
935982
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
936983

984+
@inheritdoc(Container)
985+
def __iadd__(self, other):
986+
if isinstance(other, Branch):
987+
if self.size != other.size:
988+
raise ContainerException("cannot add Branches because they have different sizes: ({0} vs {1})".format(self.size, other.size))
989+
self.entries += other.entries
990+
for x, y in zip(self.values, other.values):
991+
x += y
992+
return self
993+
else:
994+
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
995+
937996
@inheritdoc(Container)
938997
def __mul__(self, factor):
939998
if math.isnan(factor) or factor <= 0.0:
@@ -973,6 +1032,9 @@ def _numpy(self, data, weights, shape):
9731032
else:
9741033
self.entries += float(weights * shape[0])
9751034

1035+
def _sparksql(self, jvm, converter):
1036+
return converter.Branch(*[v._sparksql(jvm, converter) for v in self.values])
1037+
9761038
@property
9771039
def children(self):
9781040
"""List of sub-aggregators, to make it possible to walk the tree."""

histogrammar/primitives/count.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,14 @@ def __add__(self, other):
8383
else:
8484
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
8585

86+
@inheritdoc(Container)
87+
def __iadd__(self, other):
88+
if isinstance(other, Count):
89+
self.entries += other.entries
90+
return self
91+
else:
92+
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
93+
8694
@inheritdoc(Container)
8795
def __mul__(self, factor):
8896
if self.transform != identity or \
@@ -183,6 +191,9 @@ def _numpy(self, data, weights, shape):
183191
else:
184192
raise ValueError("cannot use Numpy to fill an isolated Count (unless the weights are given as an array)")
185193

194+
def _sparksql(self, jvm, converter):
195+
return converter.Count() # TODO: handle transform
196+
186197
@property
187198
def children(self):
188199
"""List of sub-aggregators, to make it possible to walk the tree."""

histogrammar/primitives/deviate.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,14 @@ def __add__(self, other):
109109
else:
110110
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
111111

112+
@inheritdoc(Container)
113+
def __iadd__(self, other):
114+
both = self + other
115+
self.entries = both.entries
116+
self.mean = both.mean
117+
self.varianceTimesEntries = both.varianceTimesEntries
118+
return self
119+
112120
@inheritdoc(Container)
113121
def __mul__(self, factor):
114122
if math.isnan(factor) or factor <= 0.0:
@@ -331,6 +339,9 @@ def _numpy(self, data, weights, shape):
331339
self.mean = float((ca*ma + (ca_plus_cb - ca)*mb) / ca_plus_cb)
332340
self.varianceTimesEntries = float(sa + sb + ca*ma*ma + cb*mb*mb - 2.0*self.mean*(ca*ma + cb*mb) + self.mean*self.mean*ca_plus_cb)
333341

342+
def _sparksql(self, jvm, converter):
343+
return converter.Deviate(quantity.asSparkSQL())
344+
334345
@property
335346
def children(self):
336347
"""List of sub-aggregators, to make it possible to walk the tree."""

histogrammar/primitives/fraction.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,16 @@ def __add__(self, other):
119119
else:
120120
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
121121

122+
@inheritdoc(Container)
123+
def __iadd__(self, other):
124+
if isinstance(other, Fraction):
125+
self.entries += other.entries
126+
self.numerator += other.numerator
127+
self.denominator += other.denominator
128+
return self
129+
else:
130+
raise ContainerException("cannot add {0} and {1}".format(self.name, other.name))
131+
122132
@inheritdoc(Container)
123133
def __mul__(self, factor):
124134
if math.isnan(factor) or factor <= 0.0:
@@ -249,6 +259,9 @@ def _numpy(self, data, weights, shape):
249259
# no possibility of exception from here on out (for rollback)
250260
self.entries += float(weights.sum())
251261

262+
def _sparksql(self, jvm, converter):
263+
return converter.Fraction(quantity.asSparkSQL(), self.numerator._sparksql(jvm, converter))
264+
252265
@property
253266
def children(self):
254267
"""List of sub-aggregators, to make it possible to walk the tree."""

0 commit comments

Comments
 (0)