Skip to content

Commit 6d6d67a

Browse files
committed
1. use n_elem for calculating count.avg 2. store count.avg.smoothed and count.avg.smoothed.agg in cvd table
1 parent 2876721 commit 6d6d67a

File tree

2 files changed

+36
-7
lines changed

2 files changed

+36
-7
lines changed

cooltools/api/expected.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,8 @@ def _make_diag_table(n_bins, bad_locs):
256256
)
257257
diags = diags[diags["n_elem"] > 0]
258258

259-
diags = diags.drop("n_elem", axis=1)
259+
# keep diags["n_elem"] for calculating count.avg
260+
# diags = diags.drop("n_elem", axis=1)
260261
return diags.astype(int)
261262

262263

@@ -1009,17 +1010,21 @@ def expected_cis(
10091010

10101011
# calculate actual averages by dividing sum by n_valid:
10111012
for key in chain(["count"], transforms):
1012-
result[f"{key}.avg"] = result[f"{key}.sum"] / result[_NUM_VALID]
1013+
if key == "count":
1014+
result[f"{key}.avg"] = result[f"{key}.sum"] / result["n_elem"]
1015+
else:
1016+
result[f"{key}.avg"] = result[f"{key}.sum"] / result[_NUM_VALID]
1017+
10131018

10141019
# additional smoothing and aggregating options would add columns only, not replace them
10151020
if smooth:
10161021
result_smooth = expected_smoothing.agg_smooth_cvd(
10171022
result,
10181023
sigma_log10=smooth_sigma,
10191024
)
1020-
# add smoothed columns to the result (only balanced for now)
1025+
# add smoothed columns to the result (only balanced for now) (include count as well)
10211026
result = result.merge(
1022-
result_smooth[["balanced.avg.smoothed", _DIST]],
1027+
result_smooth[["balanced.avg.smoothed", "count.avg.smoothed", _DIST]],
10231028
on=[_REGION1, _REGION2, _DIST],
10241029
how="left",
10251030
)
@@ -1028,10 +1033,10 @@ def expected_cis(
10281033
result,
10291034
groupby=None,
10301035
sigma_log10=smooth_sigma,
1031-
).rename(columns={"balanced.avg.smoothed": "balanced.avg.smoothed.agg"})
1036+
).rename(columns={"balanced.avg.smoothed": "balanced.avg.smoothed.agg", "count.avg.smoothed": "count.avg.smoothed.agg"})
10321037
# add smoothed columns to the result
10331038
result = result.merge(
1034-
result_smooth_agg[["balanced.avg.smoothed.agg", _DIST]],
1039+
result_smooth_agg[["balanced.avg.smoothed.agg", "count.avg.smoothed.agg", _DIST]],
10351040
on=[
10361041
_DIST,
10371042
],

cooltools/sandbox/expected_smoothing.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
"n_contacts": "balanced.sum",
1111
"contact_freq": "balanced.avg",
1212
"smooth_suffix": ".smoothed",
13+
"n_pixels_tot": "n_elem",
14+
"n_contacts_raw": "count.sum",
15+
"contact_freq_raw": "count.avg",
1316
}
1417

1518

@@ -182,6 +185,8 @@ def _smooth_cvd_group(cvd, sigma_log10, window_sigma, points_per_sigma, cols=Non
182185
{
183186
cols["n_pixels"]: "sum",
184187
cols["n_contacts"]: "sum",
188+
cols["n_pixels_tot"]: "sum",
189+
cols["n_contacts_raw"]: "sum",
185190
}
186191
)
187192
.reset_index()
@@ -198,6 +203,18 @@ def _smooth_cvd_group(cvd, sigma_log10, window_sigma, points_per_sigma, cols=Non
198203
points_per_sigma=points_per_sigma,
199204
)
200205

206+
smoothed_raw_sum, smoothed_n_elem = log_smooth(
207+
cvd_smoothed[cols["dist"]].values.astype(np.float64),
208+
[
209+
cvd_smoothed[cols["n_contacts_raw"]].values.astype(np.float64),
210+
cvd_smoothed[cols["n_pixels_tot"]].values.astype(np.float64),
211+
],
212+
sigma_log10=sigma_log10,
213+
window_sigma=window_sigma,
214+
points_per_sigma=points_per_sigma,
215+
)
216+
217+
201218
# cvd_smoothed[cols["contact_freq"]] = cvd_smoothed[cols["n_contacts"]] / cvd_smoothed[cols["n_pixels"]]
202219

203220
cvd_smoothed[cols["n_pixels"] + cols["smooth_suffix"]] = smoothed_n_valid
@@ -207,6 +224,13 @@ def _smooth_cvd_group(cvd, sigma_log10, window_sigma, points_per_sigma, cols=Non
207224
/ cvd_smoothed[cols["n_pixels"] + cols["smooth_suffix"]]
208225
)
209226

227+
cvd_smoothed[cols["n_pixels_tot"] + cols["smooth_suffix"]] = smoothed_n_elem
228+
cvd_smoothed[cols["n_contacts_raw"] + cols["smooth_suffix"]] = smoothed_raw_sum
229+
cvd_smoothed[cols["contact_freq_raw"] + cols["smooth_suffix"]] = (
230+
cvd_smoothed[cols["n_contacts_raw"] + cols["smooth_suffix"]]
231+
/ cvd_smoothed[cols["n_pixels_tot"] + cols["smooth_suffix"]]
232+
)
233+
210234
return cvd_smoothed
211235

212236

@@ -291,7 +315,7 @@ def agg_smooth_cvd(
291315
)
292316

293317
cvd_smoothed.drop(
294-
[cols["n_pixels"], cols["n_contacts"]], axis="columns", inplace=True
318+
[cols["n_pixels"], cols["n_contacts"], cols["n_pixels_tot"], cols["n_contacts_raw"]], axis="columns", inplace=True
295319
)
296320

297321
# cvd = cvd.drop(cols["contact_freq"], axis='columns', errors='ignore')

0 commit comments

Comments
 (0)