|
7 | 7 | from .core import from_geopandas, GeoDataFrame
|
8 | 8 |
|
9 | 9 |
|
| 10 | +def partitions_are_unchanged(part_idxs: np.ndarray, npartitions: int) -> bool: |
| 11 | + "Whether selecting these partition indices would result in an identical DataFrame." |
| 12 | + return len(part_idxs) == npartitions and (part_idxs[:-1] < part_idxs[1:]).all() |
| 13 | + |
| 14 | + |
10 | 15 | def sjoin(left, right, how="inner", op="intersects"):
|
11 | 16 | """
|
12 | 17 | Spatial join of two GeoDataFrames.
|
@@ -58,33 +63,52 @@ def sjoin(left, right, how="inner", op="intersects"):
|
58 | 63 | how="inner",
|
59 | 64 | op="intersects",
|
60 | 65 | )
|
61 |
| - parts_left = np.asarray(parts.index) |
62 |
| - parts_right = np.asarray(parts["index_right"].values) |
63 |
| - using_spatial_partitions = True |
64 |
| - else: |
65 |
| - # Unknown spatial partitions -> full cartesian (cross) product of all |
66 |
| - # combinations of the partitions of the left and right dataframe |
67 |
| - n_left = left.npartitions |
68 |
| - n_right = right.npartitions |
69 |
| - parts_left = np.repeat(np.arange(n_left), n_right) |
70 |
| - parts_right = np.tile(np.arange(n_right), n_left) |
71 |
| - using_spatial_partitions = False |
| 66 | + parts_left = parts.index.values |
| 67 | + parts_right = parts["index_right"].values |
| 68 | + # Sub-select just the partitions from each input we need---unless we need all of them. |
| 69 | + left_sub = ( |
| 70 | + left |
| 71 | + if partitions_are_unchanged(parts_left, left.npartitions) |
| 72 | + else left.partitions[parts_left] |
| 73 | + ) |
| 74 | + right_sub = ( |
| 75 | + right |
| 76 | + if partitions_are_unchanged(parts_right, right.npartitions) |
| 77 | + else right.partitions[parts_right] |
| 78 | + ) |
| 79 | + |
| 80 | + joined = left_sub.map_partitions( |
| 81 | + geopandas.sjoin, |
| 82 | + right_sub, |
| 83 | + how, |
| 84 | + op, |
| 85 | + enforce_metadata=False, |
| 86 | + transform_divisions=False, |
| 87 | + align_dataframes=False, |
| 88 | + meta=meta, |
| 89 | + ) |
| 90 | + |
| 91 | + # TODO preserve spatial partitions of the output if only left has spatial |
| 92 | + # partitions |
| 93 | + joined.spatial_partitions = [ |
| 94 | + left.spatial_partitions.iloc[l].intersection( |
| 95 | + right.spatial_partitions.iloc[r] |
| 96 | + ) |
| 97 | + for l, r in zip(parts_left, parts_right) |
| 98 | + ] |
| 99 | + return joined |
| 100 | + |
| 101 | + # Unknown spatial partitions -> full cartesian (cross) product of all |
| 102 | + # combinations of the partitions of the left and right dataframe |
| 103 | + n_left = left.npartitions |
| 104 | + n_right = right.npartitions |
| 105 | + parts_left = np.repeat(np.arange(n_left), n_right) |
| 106 | + parts_right = np.tile(np.arange(n_right), n_left) |
72 | 107 |
|
73 | 108 | dsk = {}
|
74 |
| - new_spatial_partitions = [] |
75 | 109 | for i, (l, r) in enumerate(zip(parts_left, parts_right)):
|
76 | 110 | dsk[(name, i)] = (geopandas.sjoin, (left._name, l), (right._name, r), how, op)
|
77 |
| - # TODO preserve spatial partitions of the output if only left has spatial |
78 |
| - # partitions |
79 |
| - if using_spatial_partitions: |
80 |
| - lr = left.spatial_partitions.iloc[l] |
81 |
| - rr = right.spatial_partitions.iloc[r] |
82 |
| - # extent = lr.intersection(rr).buffer(buffer).intersection(lr.union(rr)) |
83 |
| - extent = lr.intersection(rr) |
84 |
| - new_spatial_partitions.append(extent) |
85 | 111 |
|
86 | 112 | divisions = [None] * (len(dsk) + 1)
|
87 | 113 | graph = HighLevelGraph.from_collections(name, dsk, dependencies=[left, right])
|
88 |
| - if not using_spatial_partitions: |
89 |
| - new_spatial_partitions = None |
90 |
| - return GeoDataFrame(graph, name, meta, divisions, new_spatial_partitions) |
| 114 | + return GeoDataFrame(graph, name, meta, divisions, None) |
0 commit comments