124
124
from pandas .core .dtypes .generic import (
125
125
ABCDataFrame ,
126
126
ABCDatetimeIndex ,
127
+ ABCIntervalIndex ,
127
128
ABCMultiIndex ,
128
129
ABCPeriodIndex ,
129
130
ABCSeries ,
@@ -3492,8 +3493,6 @@ def _intersection(self, other: Index, sort: bool = False):
3492
3493
and other .is_monotonic_increasing
3493
3494
and self ._can_use_libjoin
3494
3495
and other ._can_use_libjoin
3495
- and not isinstance (self , ABCMultiIndex )
3496
- and not isinstance (other , ABCMultiIndex )
3497
3496
):
3498
3497
try :
3499
3498
res_indexer , indexer , _ = self ._inner_indexer (other )
@@ -4632,28 +4631,13 @@ def join(
4632
4631
4633
4632
_validate_join_method (how )
4634
4633
4635
- if not self .is_unique and not other .is_unique :
4636
- return self ._join_non_unique (other , how = how , sort = sort )
4637
- elif not self .is_unique or not other .is_unique :
4638
- if self .is_monotonic_increasing and other .is_monotonic_increasing :
4639
- # Note: 2023-08-15 we *do* have tests that get here with
4640
- # Categorical, string[python] (can use libjoin)
4641
- # and Interval (cannot)
4642
- if self ._can_use_libjoin and other ._can_use_libjoin :
4643
- # otherwise we will fall through to _join_via_get_indexer
4644
- # GH#39133
4645
- # go through object dtype for ea till engine is supported properly
4646
- return self ._join_monotonic (other , how = how )
4647
- else :
4648
- return self ._join_non_unique (other , how = how , sort = sort )
4649
- elif (
4650
- # GH48504: exclude MultiIndex to avoid going through MultiIndex._values
4651
- self .is_monotonic_increasing
4634
+ if (
4635
+ not isinstance (self .dtype , CategoricalDtype )
4636
+ and self .is_monotonic_increasing
4652
4637
and other .is_monotonic_increasing
4653
4638
and self ._can_use_libjoin
4654
4639
and other ._can_use_libjoin
4655
- and not isinstance (self , ABCMultiIndex )
4656
- and not isinstance (self .dtype , CategoricalDtype )
4640
+ and (self .is_unique or other .is_unique )
4657
4641
):
4658
4642
# Categorical is monotonic if data are ordered as categories, but join can
4659
4643
# not handle this in case of not lexicographically monotonic GH#38502
@@ -4662,6 +4646,8 @@ def join(
4662
4646
except TypeError :
4663
4647
# object dtype; non-comparable objects
4664
4648
pass
4649
+ elif not self .is_unique or not other .is_unique :
4650
+ return self ._join_non_unique (other , how = how , sort = sort )
4665
4651
4666
4652
return self ._join_via_get_indexer (other , how , sort )
4667
4653
@@ -4797,6 +4783,9 @@ def _join_non_unique(
4797
4783
join_idx = self .take (left_idx )
4798
4784
right = other .take (right_idx )
4799
4785
join_index = join_idx .putmask (mask , right )
4786
+ if isinstance (join_index , ABCMultiIndex ) and how == "outer" :
4787
+ # test_join_index_levels
4788
+ join_index = join_index ._sort_levels_monotonic ()
4800
4789
return join_index , left_idx , right_idx
4801
4790
4802
4791
@final
@@ -5042,10 +5031,10 @@ def _can_use_libjoin(self) -> bool:
5042
5031
or isinstance (self ._values , (ArrowExtensionArray , BaseMaskedArray ))
5043
5032
or self .dtype == "string[python]"
5044
5033
)
5045
- # For IntervalIndex, the conversion to numpy converts
5046
- # to object dtype, which negates the performance benefit of libjoin
5047
- # TODO: exclude RangeIndex and MultiIndex as these also make copies?
5048
- return not isinstance (self . dtype , IntervalDtype )
5034
+ # Exclude index types where the conversion to numpy converts to object dtype,
5035
+ # which negates the performance benefit of libjoin
5036
+ # TODO: exclude RangeIndex? Seems to break test_concat_datetime_timezone
5037
+ return not isinstance (self , ( ABCIntervalIndex , ABCMultiIndex ) )
5049
5038
5050
5039
# --------------------------------------------------------------------
5051
5040
# Uncategorized Methods
@@ -5180,8 +5169,7 @@ def _get_join_target(self) -> np.ndarray:
5180
5169
# present
5181
5170
return self ._values .to_numpy ()
5182
5171
5183
- # TODO: exclude ABCRangeIndex, ABCMultiIndex cases here as those create
5184
- # copies.
5172
+ # TODO: exclude ABCRangeIndex case here as it copies
5185
5173
target = self ._get_engine_target ()
5186
5174
if not isinstance (target , np .ndarray ):
5187
5175
raise ValueError ("_can_use_libjoin should return False." )
0 commit comments