|
4 | 4 | The main user-facing function of this module is `pileup`, it performs pileups using
|
5 | 5 | snippers and other functions defined in the module. The concept is the following:
|
6 | 6 |
|
7 |
| -- First, the provided features are annotated with the regions from a view (or simply |
| 7 | +- First, the provided features are annotated with the regions from a view (or simply |
8 | 8 | whole chromosomes, if no view is provided). They are assigned to the region that
|
9 | 9 | contains it, or the one with the largest overlap.
|
10 |
| -- Then the features are expanded using the `flank` argument, and aligned to the bins |
| 10 | +- Then the features are expanded using the `flank` argument, and aligned to the bins |
11 | 11 | of the cooler
|
12 |
| -- Depending on the requested operation (whether the normalization to expected is |
| 12 | +- Depending on the requested operation (whether the normalization to expected is |
13 | 13 | required), the appropriate snipper object is created
|
14 |
| -- A snipper can `select` a particular region of a genome-wide matrix, meaning it |
| 14 | +- A snipper can `select` a particular region of a genome-wide matrix, meaning it |
15 | 15 | stores its sparse representation in memory. This could be whole chromosomes or
|
16 | 16 | chromosome arms, for example
|
17 |
| -- A snipper can `snip` a small area of a selected region, meaning it will extract |
| 17 | +- A snipper can `snip` a small area of a selected region, meaning it will extract |
18 | 18 | and return a dense representation of this area
|
19 |
| -- For each region present, it is first `select`ed, and then all features within it are |
| 19 | +- For each region present, it is first `select`ed, and then all features within it are |
20 | 20 | `snip`ped, creating a stack: a 3D array containing all snippets for this region
|
21 |
| -- For features that are not assigned to any region, an empty snippet is returned |
22 |
| -- All per-region stacks are then combined into one, which then can be averaged to create |
| 21 | +- For features that are not assigned to any region, an empty snippet is returned |
| 22 | +- All per-region stacks are then combined into one, which then can be averaged to create |
23 | 23 | a single pileup
|
24 |
| -- The order of snippets in the stack matches the order of features, this way the stack |
| 24 | +- The order of snippets in the stack matches the order of features, this way the stack |
25 | 25 | can also be used for analysis of any subsets of original features
|
26 | 26 |
|
27 | 27 | This procedure achieves a good tradeoff between speed and RAM. Extracting each
|
@@ -390,7 +390,8 @@ def select(self, region1, region2):
|
390 | 390 | if self.cooler_opts["sparse"]:
|
391 | 391 | matrix = matrix.tocsr()
|
392 | 392 | if self.min_diag is not None:
|
393 |
| - diags = np.arange(np.diff(self.clr.extent(region1_coords)), dtype=np.int32) |
| 393 | + lo, hi = self.clr.extent(region1_coords) |
| 394 | + diags = np.arange(hi - lo, dtype=np.int32) |
394 | 395 | self.diag_indicators[region1] = LazyToeplitz(-diags, diags)
|
395 | 396 | return matrix
|
396 | 397 |
|
@@ -600,7 +601,8 @@ def select(self, region1, region2):
|
600 | 601 | .values
|
601 | 602 | )
|
602 | 603 | if self.min_diag is not None:
|
603 |
| - diags = np.arange(np.diff(self.clr.extent(region1_coords)), dtype=np.int32) |
| 604 | + lo, hi = self.clr.extent(region1_coords) |
| 605 | + diags = np.arange(hi - lo, dtype=np.int32) |
604 | 606 | self.diag_indicators[region1] = LazyToeplitz(-diags, diags)
|
605 | 607 | return matrix
|
606 | 608 |
|
@@ -770,7 +772,8 @@ def select(self, region1, region2):
|
770 | 772 | .values
|
771 | 773 | )
|
772 | 774 | if self.min_diag is not None:
|
773 |
| - diags = np.arange(np.diff(self.clr.extent(region1_coords)), dtype=np.int32) |
| 775 | + lo, hi = self.clr.extent(region1_coords) |
| 776 | + diags = np.arange(hi - lo, dtype=np.int32) |
774 | 777 | self.diag_indicators[region1] = LazyToeplitz(-diags, diags)
|
775 | 778 | return self._expected
|
776 | 779 |
|
@@ -861,7 +864,7 @@ def pileup(
|
861 | 864 | map_functor : callable, optional
|
862 | 865 | Map function to dispatch the matrix chunks to workers.
|
863 | 866 | If left unspecified, pool_decorator applies the following defaults: if nproc>1 this defaults to multiprocess.Pool;
|
864 |
| - If nproc=1 this defaults the builtin map. |
| 867 | + If nproc=1 this defaults the builtin map. |
865 | 868 |
|
866 | 869 | Returns
|
867 | 870 | -------
|
@@ -983,5 +986,5 @@ def pileup(
|
983 | 986 | stack = _pileup(features_df, snipper.select, snipper.snip, map=map_functor)
|
984 | 987 | if feature_type == "bed":
|
985 | 988 | stack = np.fmax(stack, np.transpose(stack, axes=(0, 2, 1)))
|
986 |
| - |
| 989 | + |
987 | 990 | return stack
|
0 commit comments