|
14 | 14 | # See the License for the specific language governing permissions and |
15 | 15 | # limitations under the License. |
16 | 16 |
|
| 17 | +import json |
| 18 | +import types |
| 19 | + |
17 | 20 | import histogrammar.primitives.average |
18 | 21 | import histogrammar.primitives.bag |
19 | 22 | import histogrammar.primitives.bin |
|
29 | 32 | import histogrammar.primitives.sparselybin |
30 | 33 | import histogrammar.primitives.stack |
31 | 34 | import histogrammar.primitives.sum |
| 35 | +from histogrammar.defs import Factory |
32 | 36 |
|
33 | 37 | def addMethods(df): |
34 | | - def histogrammar(self, h): |
35 | | - converter = self.df._sc._jvm.org.dianahep.histogrammar.sparksql.pyspark.AggregatorConverter() |
36 | | - agg = h._sparksql(self.df._sc._jvm, converter) |
37 | | - result = converter.histogrammar(self.df._jdf, agg) |
38 | | - return Factory.fromJson(jsonlib.loads(result.toJsonString())) |
| 38 | + def hg(self, h): |
| 39 | + converter = self._sc._jvm.org.dianahep.histogrammar.sparksql.pyspark.AggregatorConverter() |
| 40 | + agg = h._sparksql(self._sc._jvm, converter) |
| 41 | + result = converter.histogrammar(self._jdf, agg) |
| 42 | + return Factory.fromJson(json.loads(result.toJsonString())) |
39 | 43 |
|
40 | 44 | def Average(self, quantity): |
41 | 45 | return self.histogrammar(histogrammar.primitives.average.Average(quantity)) |
42 | 46 |
|
43 | 47 | def Bag(self, quantity, range): |
44 | 48 | return self.histogrammar(histogrammar.primitives.bag.Bag(quantity, range)) |
45 | 49 |
|
46 | | - # def Bin(self, num, low, high, quantity, value=histogrammar.primitives.count.Count(), underflow=histogrammar.primitives.count.Count(), overflow=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count()): |
47 | | - # return self.histogrammar() |
48 | | - |
49 | | - |
50 | | - |
51 | | - |
52 | | - # df.histogrammar = types.MethodType(histogrammar, df) |
53 | | - |
54 | | - # hg.Average = types.MethodType(Average , df) |
55 | | - # hg.Bag = types.MethodType(Bag , df) |
56 | | - # hg.Bin = types.MethodType(Bin , df) |
57 | | - # hg.Categorize = types.MethodType(Categorize , df) |
58 | | - # hg.CentrallyBin = types.MethodType(CentrallyBin , df) |
59 | | - # hg.Label = types.MethodType(Label , df) |
60 | | - # hg.UntypedLabel = types.MethodType(UntypedLabel , df) |
61 | | - # hg.Index = types.MethodType(Index , df) |
62 | | - # hg.Branch = types.MethodType(Branch , df) |
63 | | - # hg.Count = types.MethodType(Count , df) |
64 | | - # hg.Deviate = types.MethodType(Deviate , df) |
65 | | - # hg.Fraction = types.MethodType(Fraction , df) |
66 | | - # hg.IrregularlyBin = types.MethodType(IrregularlyBin , df) |
67 | | - # hg.Minimize = types.MethodType(Minimize , df) |
68 | | - # hg.Maximize = types.MethodType(Maximize , df) |
69 | | - # hg.Select = types.MethodType(Select , df) |
70 | | - # hg.SparselyBin = types.MethodType(SparselyBin , df) |
71 | | - # hg.Stack = types.MethodType(Stack , df) |
72 | | - # hg.Sum = types.MethodType(Sum , df) |
| 50 | + def Bin(self, num, low, high, quantity, value=histogrammar.primitives.count.Count(), underflow=histogrammar.primitives.count.Count(), overflow=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count()): |
| 51 | + return self.histogrammar(histogrammar.primitives.bin.Bin(num, low, high, quantity, value, underflow, overflow, nanflow)) |
| 52 | + |
| 53 | + def Categorize(self, quantity, value=histogrammar.primitives.count.Count()): |
| 54 | + return self.histogrammar(histogrammar.primitives.categorize.Categorize(quantity, value)) |
| 55 | + |
| 56 | + def CentrallyBin(self, bins, quantity, value=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count()): |
| 57 | + return self.histogrammar(histogrammar.primitives.centrallybin.CentrallyBin(bins, quantity, value, nanflow)) |
| 58 | + |
| 59 | + def Label(self, **pairs): |
| 60 | + return self.histogrammar(histogrammar.primitives.collection.Label(**pairs)) |
| 61 | + |
| 62 | + def UntypedLabel(self, **pairs): |
| 63 | + return self.histogrammar(histogrammar.primitives.collection.UntypedLabel(**pairs)) |
| 64 | + |
| 65 | + def Index(self, *values): |
| 66 | + return self.histogrammar(histogrammar.primitives.collection.Index(*values)) |
| 67 | + |
| 68 | + def Branch(self, *values): |
| 69 | + return self.histogrammar(histogrammar.primitives.collection.Branch(*values)) |
| 70 | + |
| 71 | + def Count(self): # TODO: handle transform |
| 72 | + return self.histogrammar(histogrammar.primitives.count.Count()) |
| 73 | + |
| 74 | + def Deviate(self, quantity): |
| 75 | + return self.histogrammar(histogrammar.primitives.deviate.Deviate(quantity)) |
| 76 | + |
| 77 | + def Fraction(self, quantity, value=histogrammar.primitives.count.Count()): |
| 78 | + return self.histogrammar(histogrammar.primitives.fraction.Fraction(quantity, value)) |
| 79 | + |
| 80 | + def IrregularlyBin(self, thresholds, quantity, value=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count()): |
| 81 | + return self.histogrammar(histogrammar.primitives.irregularlybin.IrregularlyBin(thresholds, quantity, value=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count())) |
| 82 | + |
| 83 | + def Minimize(self, quantity): |
| 84 | + return self.histogrammar(histogrammar.primitives.minmax.Minimize(quantity)) |
| 85 | + |
| 86 | + def Maximize(self, quantity): |
| 87 | + return self.histogrammar(histogrammar.primitives.minmax.Maximize(quantity)) |
| 88 | + |
| 89 | + def Select(self, quantity, cut=histogrammar.primitives.count.Count()): |
| 90 | + return self.histogrammar(histogrammar.primitives.select.Select(quantity, cut)) |
| 91 | + |
| 92 | + def SparselyBin(self, binWidth, quantity, value=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count(), origin=0.0): |
| 93 | + return self.histogrammar(histogrammar.primitives.sparselybin.SparselyBin(binWidth, quantity, value, nanflow, origin)) |
| 94 | + |
| 95 | + def Stack(self, bins, quantity, value=histogrammar.primitives.count.Count(), nanflow=histogrammar.primitives.count.Count()): |
| 96 | + return self.histogrammar(histogrammar.primitives.stack.Stack(bins, quantity, value, nanflow)) |
| 97 | + |
| 98 | + def Sum(self, quantity): |
| 99 | + return self.histogrammar(histogrammar.primitives.sum.Sum(quantity)) |
| 100 | + |
| 101 | + df.histogrammar = types.MethodType(hg, df) |
| 102 | + |
| 103 | + df.Average = types.MethodType(Average , df) |
| 104 | + df.Bag = types.MethodType(Bag , df) |
| 105 | + df.Bin = types.MethodType(Bin , df) |
| 106 | + df.Categorize = types.MethodType(Categorize , df) |
| 107 | + df.CentrallyBin = types.MethodType(CentrallyBin , df) |
| 108 | + df.Label = types.MethodType(Label , df) |
| 109 | + df.UntypedLabel = types.MethodType(UntypedLabel , df) |
| 110 | + df.Index = types.MethodType(Index , df) |
| 111 | + df.Branch = types.MethodType(Branch , df) |
| 112 | + df.Count = types.MethodType(Count , df) |
| 113 | + df.Deviate = types.MethodType(Deviate , df) |
| 114 | + df.Fraction = types.MethodType(Fraction , df) |
| 115 | + df.IrregularlyBin = types.MethodType(IrregularlyBin , df) |
| 116 | + df.Minimize = types.MethodType(Minimize , df) |
| 117 | + df.Maximize = types.MethodType(Maximize , df) |
| 118 | + df.Select = types.MethodType(Select , df) |
| 119 | + df.SparselyBin = types.MethodType(SparselyBin , df) |
| 120 | + df.Stack = types.MethodType(Stack , df) |
| 121 | + df.Sum = types.MethodType(Sum , df) |
0 commit comments