@@ -56,18 +56,14 @@ def check_results(df_np, df_da, expected_results_dict):
56
56
df_np [col ], expected_results_dict [col ], equal_nan = True
57
57
).all ()
58
58
59
- # dask case
60
- assert isinstance (df_da , dd .DataFrame )
61
- df_da = df_da .compute ()
62
- assert isinstance (df_da , pd .DataFrame )
63
-
64
- # numpy results equal dask results
65
- # zone column
66
- assert (df_np ['zone' ] == df_da ['zone' ]).all ()
59
+ if df_da is not None :
60
+ # dask case
61
+ assert isinstance (df_da , dd .DataFrame )
62
+ df_da = df_da .compute ()
63
+ assert isinstance (df_da , pd .DataFrame )
67
64
68
- assert (df_np .columns == df_da .columns ).all ()
69
- for col in df_np .columns [1 :]:
70
- assert np .isclose (df_np [col ], df_da [col ], equal_nan = True ).all ()
65
+ # numpy results equal dask results, ignoring their indexes
66
+ assert np .array_equal (df_np .values , df_da .values , equal_nan = True )
71
67
72
68
73
69
def test_stats ():
@@ -93,7 +89,27 @@ def test_stats():
93
89
df_da = stats (zones = zones_da , values = values_da )
94
90
check_results (df_np , df_da , default_stats_results )
95
91
96
- # ---- custom stats ----
92
+ # expected results
93
+ stats_results_zone_0_3 = {
94
+ 'zone' : [0 , 3 ],
95
+ 'mean' : [0 , 2.4 ],
96
+ 'max' : [0 , 3 ],
97
+ 'min' : [0 , 0 ],
98
+ 'sum' : [0 , 12 ],
99
+ 'std' : [0 , 1.2 ],
100
+ 'var' : [0 , 1.44 ],
101
+ 'count' : [5 , 5 ]
102
+ }
103
+
104
+ # numpy case
105
+ df_np_zone_0_3 = stats (zones = zones_np , values = values_np , zone_ids = [0 , 3 ])
106
+
107
+ # dask case
108
+ df_da_zone_0_3 = stats (zones = zones_da , values = values_da , zone_ids = [0 , 3 ])
109
+
110
+ check_results (df_np_zone_0_3 , df_da_zone_0_3 , stats_results_zone_0_3 )
111
+
112
+ # ---- custom stats (NumPy only) ----
97
113
# expected results
98
114
custom_stats_results = {
99
115
'zone' : [1 , 2 ],
@@ -115,13 +131,10 @@ def _range(values):
115
131
# numpy case
116
132
df_np = stats (
117
133
zones = zones_np , values = values_np , stats_funcs = custom_stats ,
118
- zone_ids = [1 , 2 ], nodata_zones = 0 , nodata_values = 0
134
+ zone_ids = [1 , 2 ], nodata_values = 0
119
135
)
120
136
# dask case
121
- df_da = stats (
122
- zones = zones_da , values = values_da , stats_funcs = custom_stats ,
123
- zone_ids = [1 , 2 ], nodata_zones = 0 , nodata_values = 0
124
- )
137
+ df_da = None
125
138
check_results (df_np , df_da , custom_stats_results )
126
139
127
140
0 commit comments