Skip to content

Commit d14a4fa

Browse files
committed
Nice interface; works for simple histograms.
1 parent 25a7a35 commit d14a4fa

File tree

1 file changed

+81
-32
lines changed

1 file changed

+81
-32
lines changed

histogrammar/sparksql.py

Lines changed: 81 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616

17+
import json
18+
import types
19+
1720
import histogrammar.primitives.average
1821
import histogrammar.primitives.bag
1922
import histogrammar.primitives.bin
@@ -29,44 +32,90 @@
2932
import histogrammar.primitives.sparselybin
3033
import histogrammar.primitives.stack
3134
import histogrammar.primitives.sum
35+
from histogrammar.defs import Factory
3236

3337
def addMethods(df):
34-
def histogrammar(self, h):
35-
converter = self.df._sc._jvm.org.dianahep.histogrammar.sparksql.pyspark.AggregatorConverter()
36-
agg = h._sparksql(self.df._sc._jvm, converter)
37-
result = converter.histogrammar(self.df._jdf, agg)
38-
return Factory.fromJson(jsonlib.loads(result.toJsonString()))
38+
def hg(self, h):
39+
converter = self._sc._jvm.org.dianahep.histogrammar.sparksql.pyspark.AggregatorConverter()
40+
agg = h._sparksql(self._sc._jvm, converter)
41+
result = converter.histogrammar(self._jdf, agg)
42+
return Factory.fromJson(json.loads(result.toJsonString()))
3943

4044
def Average(self, quantity):
4145
return self.histogrammar(histogrammar.primitives.average.Average(quantity))
4246

4347
def Bag(self, quantity, range):
4448
return self.histogrammar(histogrammar.primitives.bag.Bag(quantity, range))
4549

46-
# def Bin(self, num, low, high, quantity, value=histogrammar.primitives.count.Count(), underflow=histogrammar.primitives.count.Count(), overflow=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count()):
47-
# return self.histogrammar()
48-
49-
50-
51-
52-
# df.histogrammar = types.MethodType(histogrammar, df)
53-
54-
# hg.Average = types.MethodType(Average , df)
55-
# hg.Bag = types.MethodType(Bag , df)
56-
# hg.Bin = types.MethodType(Bin , df)
57-
# hg.Categorize = types.MethodType(Categorize , df)
58-
# hg.CentrallyBin = types.MethodType(CentrallyBin , df)
59-
# hg.Label = types.MethodType(Label , df)
60-
# hg.UntypedLabel = types.MethodType(UntypedLabel , df)
61-
# hg.Index = types.MethodType(Index , df)
62-
# hg.Branch = types.MethodType(Branch , df)
63-
# hg.Count = types.MethodType(Count , df)
64-
# hg.Deviate = types.MethodType(Deviate , df)
65-
# hg.Fraction = types.MethodType(Fraction , df)
66-
# hg.IrregularlyBin = types.MethodType(IrregularlyBin , df)
67-
# hg.Minimize = types.MethodType(Minimize , df)
68-
# hg.Maximize = types.MethodType(Maximize , df)
69-
# hg.Select = types.MethodType(Select , df)
70-
# hg.SparselyBin = types.MethodType(SparselyBin , df)
71-
# hg.Stack = types.MethodType(Stack , df)
72-
# hg.Sum = types.MethodType(Sum , df)
50+
def Bin(self, num, low, high, quantity, value=histogrammar.primitives.count.Count(), underflow=histogrammar.primitives.count.Count(), overflow=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count()):
51+
return self.histogrammar(histogrammar.primitives.bin.Bin(num, low, high, quantity, value, underflow, overflow, nanflow))
52+
53+
def Categorize(self, quantity, value=histogrammar.primitives.count.Count()):
54+
return self.histogrammar(histogrammar.primitives.categorize.Categorize(quantity, value))
55+
56+
def CentrallyBin(self, bins, quantity, value=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count()):
57+
return self.histogrammar(histogrammar.primitives.centrallybin.CentrallyBin(bins, quantity, value, nanflow))
58+
59+
def Label(self, **pairs):
60+
return self.histogrammar(histogrammar.primitives.collection.Label(**pairs))
61+
62+
def UntypedLabel(self, **pairs):
63+
return self.histogrammar(histogrammar.primitives.collection.UntypedLabel(**pairs))
64+
65+
def Index(self, *values):
66+
return self.histogrammar(histogrammar.primitives.collection.Index(*values))
67+
68+
def Branch(self, *values):
69+
return self.histogrammar(histogrammar.primitives.collection.Branch(*values))
70+
71+
def Count(self): # TODO: handle transform
72+
return self.histogrammar(histogrammar.primitives.count.Count())
73+
74+
def Deviate(self, quantity):
75+
return self.histogrammar(histogrammar.primitives.deviate.Deviate(quantity))
76+
77+
def Fraction(self, quantity, value=histogrammar.primitives.count.Count()):
78+
return self.histogrammar(histogrammar.primitives.fraction.Fraction(quantity, value))
79+
80+
def IrregularlyBin(self, thresholds, quantity, value=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count()):
81+
return self.histogrammar(histogrammar.primitives.irregularlybin.IrregularlyBin(thresholds, quantity, value=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count()))
82+
83+
def Minimize(self, quantity):
84+
return self.histogrammar(histogrammar.primitives.minmax.Minimize(quantity))
85+
86+
def Maximize(self, quantity):
87+
return self.histogrammar(histogrammar.primitives.minmax.Maximize(quantity))
88+
89+
def Select(self, quantity, cut=histogrammar.primitives.count.Count()):
90+
return self.histogrammar(histogrammar.primitives.select.Select(quantity, cut))
91+
92+
def SparselyBin(self, binWidth, quantity, value=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count(), origin=0.0):
93+
return self.histogrammar(histogrammar.primitives.sparselybin.SparselyBin(binWidth, quantity, value, nanflow, origin))
94+
95+
def Stack(self, bins, quantity, value=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count()):
96+
return self.histogrammar(histogrammar.primitives.stack.Stack(bins, quantity, value, nanflow))
97+
98+
def Sum(self, quantity):
99+
return self.histogrammar(histogrammar.primitives.sum.Sum(quantity))
100+
101+
df.histogrammar = types.MethodType(hg, df)
102+
103+
df.Average = types.MethodType(Average , df)
104+
df.Bag = types.MethodType(Bag , df)
105+
df.Bin = types.MethodType(Bin , df)
106+
df.Categorize = types.MethodType(Categorize , df)
107+
df.CentrallyBin = types.MethodType(CentrallyBin , df)
108+
df.Label = types.MethodType(Label , df)
109+
df.UntypedLabel = types.MethodType(UntypedLabel , df)
110+
df.Index = types.MethodType(Index , df)
111+
df.Branch = types.MethodType(Branch , df)
112+
df.Count = types.MethodType(Count , df)
113+
df.Deviate = types.MethodType(Deviate , df)
114+
df.Fraction = types.MethodType(Fraction , df)
115+
df.IrregularlyBin = types.MethodType(IrregularlyBin , df)
116+
df.Minimize = types.MethodType(Minimize , df)
117+
df.Maximize = types.MethodType(Maximize , df)
118+
df.Select = types.MethodType(Select , df)
119+
df.SparselyBin = types.MethodType(SparselyBin , df)
120+
df.Stack = types.MethodType(Stack , df)
121+
df.Sum = types.MethodType(Sum , df)

0 commit comments

Comments
 (0)