Skip to content

Commit 56e9c90

Browse files
committed
Use a naive sparse histogram.
1 parent 0cd372a commit 56e9c90

File tree

2 files changed

+62
-50
lines changed

2 files changed

+62
-50
lines changed
+45-31
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,37 @@
11
"""Sparse 1-D histogram of healpix pixel counts."""
22

33
import numpy as np
4-
from scipy.sparse import csc_array, load_npz, save_npz, sparray
54

65
import hats.pixel_math.healpix_shim as hp
76

87

98
class SparseHistogram:
10-
"""Wrapper around scipy's sparse array."""
9+
"""Wrapper around a naive sparse array, that is just non-zero indexes and counts."""
1110

12-
def __init__(self, sparse_array):
13-
if not isinstance(sparse_array, sparray):
14-
raise ValueError("The sparse array must be a scipy sparse array.")
15-
if sparse_array.format != "csc":
16-
raise ValueError("The sparse array must be a Compressed Sparse Column array.")
17-
self.sparse_array = sparse_array
18-
19-
def add(self, other):
20-
"""Add in another sparse histogram, updating this wrapper's array.
21-
22-
Args:
23-
other (SparseHistogram): the wrapper containing the addend
24-
"""
25-
if not isinstance(other, SparseHistogram):
26-
raise ValueError("Both addends should be SparseHistogram.")
27-
if self.sparse_array.shape != other.sparse_array.shape:
28-
raise ValueError(
29-
"The histogram partials have incompatible sizes due to different healpix orders."
30-
)
31-
self.sparse_array += other.sparse_array
11+
def __init__(self, indexes, counts, order):
12+
if len(indexes) != len(counts):
13+
raise ValueError("indexes and counts must be same length")
14+
self.indexes = indexes
15+
self.counts = counts
16+
self.order = order
3217

3318
def to_array(self):
3419
"""Convert the sparse array to a dense numpy array.
3520
3621
Returns:
3722
dense 1-d numpy array.
3823
"""
39-
return self.sparse_array.toarray()[0]
24+
dense = np.zeros(hp.order2npix(self.order), dtype=np.int64)
25+
dense[self.indexes] = self.counts
26+
return dense
4027

4128
def to_file(self, file_name):
4229
"""Persist the sparse array to disk.
4330
4431
NB: this saves as a sparse array, and so will likely have lower space requirements
4532
than saving the corresponding dense 1-d numpy array.
4633
"""
47-
save_npz(file_name, self.sparse_array)
34+
np.savez(file_name, indexes=self.indexes, counts=self.counts, order=self.order)
4835

4936
def to_dense_file(self, file_name):
5037
"""Persist the DENSE array to disk as a numpy array."""
@@ -61,8 +48,7 @@ def make_empty(cls, healpix_order=10):
6148
Returns:
6249
new sparse histogram
6350
"""
64-
histo = csc_array((1, hp.order2npix(healpix_order)), dtype=np.int64)
65-
return cls(histo)
51+
return cls([], [], healpix_order)
6652

6753
@classmethod
6854
def make_from_counts(cls, indexes, counts_at_indexes, healpix_order=10):
@@ -86,9 +72,7 @@ def make_from_counts(cls, indexes, counts_at_indexes, healpix_order=10):
8672
Returns:
8773
new sparse histogram
8874
"""
89-
row = np.array(np.zeros(len(indexes), dtype=np.int64))
90-
histo = csc_array((counts_at_indexes, (row, indexes)), shape=(1, hp.order2npix(healpix_order)))
91-
return cls(histo)
75+
return cls(indexes, counts_at_indexes, healpix_order)
9276

9377
@classmethod
9478
def from_file(cls, file_name):
@@ -97,5 +81,35 @@ def from_file(cls, file_name):
9781
Returns:
9882
new sparse histogram
9983
"""
100-
histo = load_npz(file_name)
101-
return cls(histo)
84+
npzfile = np.load(file_name)
85+
return cls(npzfile["indexes"], npzfile["counts"], npzfile["order"])
86+
87+
88+
class HistogramAggregator:
89+
"""Utility for aggregating sparse histograms."""
90+
91+
def __init__(self, order):
92+
self.order = order
93+
self.full_histogram = np.zeros(hp.order2npix(order), dtype=np.int64)
94+
95+
def add(self, other):
96+
"""Add in another sparse histogram, updating this wrapper's array.
97+
98+
Args:
99+
other (SparseHistogram): the wrapper containing the addend
100+
"""
101+
if not isinstance(other, SparseHistogram):
102+
raise ValueError("Both addends should be SparseHistogram.")
103+
if self.order != other.order:
104+
raise ValueError(
105+
"The histogram partials have incompatible sizes due to different healpix orders."
106+
)
107+
if len(other.indexes) == 0:
108+
return
109+
self.full_histogram[other.indexes] += other.counts
110+
111+
def to_sparse(self):
112+
"""Return a SparseHistogram, based on non-zero values in this aggregation."""
113+
indexes = self.full_histogram.nonzero()[0]
114+
counts = self.full_histogram[indexes]
115+
return SparseHistogram(indexes, counts, self.order)

tests/hats/pixel_math/test_sparse_histogram.py

+17-19
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
import numpy.testing as npt
55
import pytest
66
from numpy import frombuffer
7-
from scipy.sparse import csr_array
87

98
import hats.pixel_math.healpix_shim as hp
10-
from hats.pixel_math.sparse_histogram import SparseHistogram
9+
from hats.pixel_math.sparse_histogram import HistogramAggregator, SparseHistogram
1110

1211

1312
def test_make_empty():
@@ -42,39 +41,38 @@ def test_add_same_order():
4241

4342
partial_histogram_right = SparseHistogram.make_from_counts([10, 11], [4, 15], 0)
4443

45-
partial_histogram_left.add(partial_histogram_right)
44+
total_histogram = HistogramAggregator(0)
45+
total_histogram.add(partial_histogram_left)
46+
total_histogram.add(partial_histogram_right)
4647

4748
expected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 146]
48-
npt.assert_array_equal(partial_histogram_left.to_array(), expected)
49+
npt.assert_array_equal(total_histogram.full_histogram, expected)
50+
51+
sparse = total_histogram.to_sparse()
52+
npt.assert_array_equal(sparse.indexes, [10, 11])
53+
npt.assert_array_equal(sparse.counts, [4, 146])
54+
npt.assert_array_equal(sparse.order, 0)
4955

5056

5157
def test_add_different_order():
5258
"""Test that we can NOT add histograms of different healpix orders."""
5359
partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0)
54-
5560
partial_histogram_right = SparseHistogram.make_from_counts([10, 11], [4, 15], 1)
5661

62+
total_histogram = HistogramAggregator(0)
63+
total_histogram.add(partial_histogram_left)
5764
with pytest.raises(ValueError, match="partials have incompatible sizes"):
58-
partial_histogram_left.add(partial_histogram_right)
65+
total_histogram.add(partial_histogram_right)
5966

6067

6168
def test_add_different_type():
6269
"""Test that we can NOT add histograms of different healpix orders."""
6370
partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0)
6471

72+
total_histogram = HistogramAggregator(0)
73+
total_histogram.add(partial_histogram_left)
6574
with pytest.raises(ValueError, match="addends should be SparseHistogram"):
66-
partial_histogram_left.add(5)
75+
total_histogram.add(5)
6776

6877
with pytest.raises(ValueError, match="addends should be SparseHistogram"):
69-
partial_histogram_left.add([1, 2, 3, 4, 5])
70-
71-
72-
def test_init_bad_inputs():
73-
"""Test that the SparseHistogram type requires a compressed sparse column
74-
as its sole `sparse_array` argument."""
75-
with pytest.raises(ValueError, match="must be a scipy sparse array"):
76-
SparseHistogram(5)
77-
78-
with pytest.raises(ValueError, match="must be a Compressed Sparse Column"):
79-
row_sparse_array = csr_array((1, 12), dtype=np.int64)
80-
SparseHistogram(row_sparse_array)
78+
total_histogram.add(([1, 2, 3, 4, 5], [1, 2, 3, 4, 5], 0))

0 commit comments

Comments
 (0)