1
1
"""Sparse 1-D histogram of healpix pixel counts."""
2
2
3
3
import numpy as np
4
- from scipy .sparse import csc_array , load_npz , save_npz , sparray
5
4
6
5
import hats .pixel_math .healpix_shim as hp
7
6
8
7
9
8
class SparseHistogram :
10
- """Wrapper around scipy's sparse array."""
9
+ """Wrapper around a naive sparse array, that is just non-zero indexes and counts ."""
11
10
12
- def __init__ (self , sparse_array ):
13
- if not isinstance (sparse_array , sparray ):
14
- raise ValueError ("The sparse array must be a scipy sparse array." )
15
- if sparse_array .format != "csc" :
16
- raise ValueError ("The sparse array must be a Compressed Sparse Column array." )
17
- self .sparse_array = sparse_array
18
-
19
- def add (self , other ):
20
- """Add in another sparse histogram, updating this wrapper's array.
21
-
22
- Args:
23
- other (SparseHistogram): the wrapper containing the addend
24
- """
25
- if not isinstance (other , SparseHistogram ):
26
- raise ValueError ("Both addends should be SparseHistogram." )
27
- if self .sparse_array .shape != other .sparse_array .shape :
28
- raise ValueError (
29
- "The histogram partials have incompatible sizes due to different healpix orders."
30
- )
31
- self .sparse_array += other .sparse_array
11
+ def __init__ (self , indexes , counts , order ):
12
+ if len (indexes ) != len (counts ):
13
+ raise ValueError ("indexes and counts must be same length" )
14
+ self .indexes = indexes
15
+ self .counts = counts
16
+ self .order = order
32
17
33
18
def to_array (self ):
34
19
"""Convert the sparse array to a dense numpy array.
35
20
36
21
Returns:
37
22
dense 1-d numpy array.
38
23
"""
39
- return self .sparse_array .toarray ()[0 ]
24
+ dense = np .zeros (hp .order2npix (self .order ), dtype = np .int64 )
25
+ dense [self .indexes ] = self .counts
26
+ return dense
40
27
41
28
def to_file (self , file_name ):
42
29
"""Persist the sparse array to disk.
43
30
44
31
NB: this saves as a sparse array, and so will likely have lower space requirements
45
32
than saving the corresponding dense 1-d numpy array.
46
33
"""
47
- save_npz (file_name , self .sparse_array )
34
+ np . savez (file_name , indexes = self .indexes , counts = self . counts , order = self . order )
48
35
49
36
def to_dense_file (self , file_name ):
50
37
"""Persist the DENSE array to disk as a numpy array."""
@@ -61,8 +48,7 @@ def make_empty(cls, healpix_order=10):
61
48
Returns:
62
49
new sparse histogram
63
50
"""
64
- histo = csc_array ((1 , hp .order2npix (healpix_order )), dtype = np .int64 )
65
- return cls (histo )
51
+ return cls ([], [], healpix_order )
66
52
67
53
@classmethod
68
54
def make_from_counts (cls , indexes , counts_at_indexes , healpix_order = 10 ):
@@ -86,9 +72,7 @@ def make_from_counts(cls, indexes, counts_at_indexes, healpix_order=10):
86
72
Returns:
87
73
new sparse histogram
88
74
"""
89
- row = np .array (np .zeros (len (indexes ), dtype = np .int64 ))
90
- histo = csc_array ((counts_at_indexes , (row , indexes )), shape = (1 , hp .order2npix (healpix_order )))
91
- return cls (histo )
75
+ return cls (indexes , counts_at_indexes , healpix_order )
92
76
93
77
@classmethod
94
78
def from_file (cls , file_name ):
@@ -97,5 +81,35 @@ def from_file(cls, file_name):
97
81
Returns:
98
82
new sparse histogram
99
83
"""
100
- histo = load_npz (file_name )
101
- return cls (histo )
84
+ npzfile = np .load (file_name )
85
+ return cls (npzfile ["indexes" ], npzfile ["counts" ], npzfile ["order" ])
86
+
87
+
88
+ class HistogramAggregator :
89
+ """Utility for aggregating sparse histograms."""
90
+
91
+ def __init__ (self , order ):
92
+ self .order = order
93
+ self .full_histogram = np .zeros (hp .order2npix (order ), dtype = np .int64 )
94
+
95
+ def add (self , other ):
96
+ """Add in another sparse histogram, updating this wrapper's array.
97
+
98
+ Args:
99
+ other (SparseHistogram): the wrapper containing the addend
100
+ """
101
+ if not isinstance (other , SparseHistogram ):
102
+ raise ValueError ("Both addends should be SparseHistogram." )
103
+ if self .order != other .order :
104
+ raise ValueError (
105
+ "The histogram partials have incompatible sizes due to different healpix orders."
106
+ )
107
+ if len (other .indexes ) == 0 :
108
+ return
109
+ self .full_histogram [other .indexes ] += other .counts
110
+
111
+ def to_sparse (self ):
112
+ """Return a SparseHistogram, based on non-zero values in this aggregation."""
113
+ indexes = self .full_histogram .nonzero ()[0 ]
114
+ counts = self .full_histogram [indexes ]
115
+ return SparseHistogram (indexes , counts , self .order )
0 commit comments