1414# See the License for the specific language governing permissions and
1515# limitations under the License.
1616
17+ import numpy as np
1718import math
1819import numbers
1920
@@ -259,7 +260,32 @@ def at(self, index):
259260 @property
260261 def indexes (self ):
261262 """Get a sequence of filled indexes."""
262- return sorted (self .keys )
263+ return sorted (self .bins .keys ())
264+
265+ @property
266+ def binsMap (self ):
267+ """Input ``bins`` as a key-value map."""
268+ return self .bins
269+
270+ @property
271+ def size (self ):
272+ """Number of ``bins``."""
273+ return len (self .bins )
274+
275+ @property
276+ def keys (self ):
277+ """Iterable over the keys of the ``bins``."""
278+ return self .bins .keys ()
279+
280+ @property
281+ def values (self ):
282+ """Iterable over the values of the ``bins``."""
283+ return list (self .bins .values ())
284+
285+ @property
286+ def keySet (self ):
287+ """Set of keys among the ``bins``."""
288+ return set (self .bins .keys ())
263289
264290 def range (self , index ):
265291 """Get the low and high edge of a bin (given by index number)."""
@@ -432,48 +458,76 @@ def _c99StructName(self):
432458 def _numpy (self , data , weights , shape ):
433459 q = self .quantity (data )
434460 self ._checkNPQuantity (q , shape )
461+
462+ if isinstance (weights , (float , int )) and weights == 1 :
463+ all_weights_one = True
464+ elif isinstance (weights , np .ndarray ) and np .all (weights == 1 ):
465+ all_weights_one = True
466+ else :
467+ all_weights_one = False
435468 self ._checkNPWeights (weights , shape )
436469 weights = self ._makeNPWeights (weights , shape )
437470 newentries = weights .sum ()
438471
439- import numpy
440-
441- selection = numpy .isnan (q )
442- numpy .bitwise_not (selection , selection )
472+ selection = np .isnan (q )
473+ np .bitwise_not (selection , selection ) # invert selection
443474 subweights = weights .copy ()
444475 subweights [selection ] = 0.0
445476 self .nanflow ._numpy (data , subweights , shape )
477+ subweights [:] = weights
446478
447479 # switch to float here like in bin.py else numpy throws
448480 # TypeError on trivial integer cases such as:
449- # >>> q = numpy .array([1,2,3,4])
481+ # >>> q = np .array([1,2,3,4])
450482 # >>> np.divide(q,1,q)
451483 # >>> np.floor(q,q)
452- q = numpy .array (q , dtype = numpy .float64 )
453- neginfs = numpy .isneginf (q )
454- posinfs = numpy .isposinf (q )
455-
456- numpy .subtract (q , self .origin , q )
457- numpy .divide (q , self .binWidth , q )
458- numpy .floor (q , q )
459- q = numpy .array (q , dtype = numpy .int64 )
484+ q = np .array (q , dtype = np .float64 )
485+ neginfs = np .isneginf (q )
486+ posinfs = np .isposinf (q )
487+
488+ np .subtract (q , self .origin , q )
489+ np .divide (q , self .binWidth , q )
490+ np .floor (q , q )
491+ q = np .array (q , dtype = np .int64 )
460492 q [neginfs ] = LONG_MINUSINF
461493 q [posinfs ] = LONG_PLUSINF
462494
463495 selected = q [weights > 0.0 ]
464496
465- selection = numpy .empty (q .shape , dtype = numpy .bool )
466- for index in numpy .unique (selected ):
467- if index != LONG_NAN :
468- bin = self .bins .get (index )
469- if bin is None :
470- bin = self .value .zero ()
471- self .bins [index ] = bin
472-
473- numpy .not_equal (q , index , selection )
474- subweights [:] = weights
475- subweights [selection ] = 0.0
476- bin ._numpy (data , subweights , shape )
497+ # used below. bit expensive, so do here once
498+ n_dim = self .n_dim
499+
500+ if n_dim == 1 and all_weights_one and isinstance (self .value , Count ):
501+ # special case: filling single array where all weights are 1
502+ # (use fast np.unique that returns counts)
503+ uniques , counts = np .unique (selected , return_counts = True )
504+ for c , index in zip (counts , uniques ):
505+ if index != LONG_NAN :
506+ bin = self .bins .get (index )
507+ if bin is None :
508+ bin = self .value .zero ()
509+ self .bins [index ] = bin
510+ # pass counts directly to Count object
511+ self .bins [index ]._numpy (None , c , [None ])
512+ else :
513+ # all other cases ...
514+ selection = np .empty (q .shape , dtype = np .bool )
515+ for index in np .unique (selected ):
516+ if index != LONG_NAN :
517+ bin = self .bins .get (index )
518+ if bin is None :
519+ bin = self .value .zero ()
520+ self .bins [index ] = bin
521+ if n_dim == 1 :
522+ # passing on the full array is faster for one-dim histograms
523+ np .not_equal (q , index , selection )
524+ subweights [:] = weights
525+ subweights [selection ] = 0.0
526+ self .bins [index ]._numpy (data , subweights , shape )
527+ else :
528+ # in practice passing on sliced arrays is faster for multi-dim histograms
529+ np .equal (q , index , selection )
530+ self .bins [index ]._numpy (data [selection ], subweights [selection ], [np .sum (selection )])
477531
478532 # no possibility of exception from here on out (for rollback)
479533 self .entries += float (newentries )
@@ -615,12 +669,12 @@ def __hash__(self):
615669
616670 @property
617671 def n_bins (self ):
618- """Get number of bins, consistent with SparselyBin and Categorize """
619- return self .size
672+ """Get number of filled bins, consistent with SparselyBin and Categorize """
673+ return len ( self .bins )
620674
621675 def num_bins (self , low = None , high = None ):
622676 """
623- Returns number of bins
677+ Returns number of bins from low to high, including unfilled
624678
625679 Possible to set range with low and high params
626680
@@ -629,7 +683,6 @@ def num_bins(self, low=None, high=None):
629683 :returns: number of bins in range
630684 :rtype: int
631685 """
632- import numpy as np
633686 # sparse hist not filled
634687 if self .minBin is None or self .maxBin is None :
635688 return 0
@@ -672,7 +725,6 @@ def bin_edges(self, low=None, high=None):
672725 :returns: numpy array with bin edges for selected range
673726 :rtype: numpy.array
674727 """
675- import numpy as np
676728 # sparse hist not filled
677729 if self .minBin is None or self .maxBin is None :
678730 return np .array ([self .origin , self .origin + 1 ])
@@ -715,7 +767,6 @@ def bin_entries(self, low=None, high=None, xvalues=[]):
715767 :returns: numpy array with numbers of entries for selected bins
716768 :rtype: numpy.array
717769 """
718- import numpy as np
719770 # sparse hist not filled
720771 if self .minBin is None or self .maxBin is None :
721772 return np .array ([])
@@ -757,10 +808,8 @@ def bin_centers(self, low=None, high=None):
757808 :returns: numpy array with bin centers for selected range
758809 :rtype: numpy.array
759810 """
760- import numpy as np
761811 bin_edges = self .bin_edges (low , high )
762- centers = [(bin_edges [i ] + bin_edges [i + 1 ]) / 2. for i in range (len (bin_edges ) - 1 )]
763- return np .array (centers )
812+ return (bin_edges [:- 1 ] + bin_edges [1 :]) / 2
764813
765814 @property
766815 def mpv (self ):
0 commit comments