From c8ba339cd9e21185105372a27e72d39f83ead9a1 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Tue, 14 Oct 2025 07:54:29 -0400 Subject: [PATCH 01/22] add targeted casting to combine_first --- pandas/core/frame.py | 45 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9c41b82bbbc8e..6b0337d06057a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9141,20 +9141,21 @@ def combine_first(self, other: DataFrame) -> DataFrame: 1 0.0 3.0 1.0 2 NaN 3.0 1.0 """ - from pandas.core.computation import expressions + from pandas.core.dtypes.common import ( + is_extension_array_dtype, + is_integer_dtype, + is_unsigned_integer_dtype, + ) def combiner(x: Series, y: Series): - mask = x.isna()._values - - x_values = x._values - y_values = y._values + mask = x.isna() # If the column y in other DataFrame is not in first DataFrame, - # just return y_values. + # just return y. if y.name not in self.columns: - return y_values + return y - return expressions.where(mask, y_values, x_values) + return y.where(mask, x) if len(other) == 0: combined = self.reindex( @@ -9162,6 +9163,34 @@ def combiner(x: Series, y: Series): ) combined = combined.astype(other.dtypes) else: + # GH #60128 + # Promote large 64-bit integers to their nullable types. + # Without this, precision will be lost in a float64 rount-trip. + def _cast_large_numpy_ints_to_nullable(df: DataFrame) -> DataFrame: + BOUND = 2**53 + cast_map: dict[str, str] = {} + for col, dt in df.dtypes.items(): + if is_integer_dtype(dt) and not is_extension_array_dtype(dt): + ser = df[col] + if ser.size == 0: + continue + if is_unsigned_integer_dtype(dt): + if ser.max() >= BOUND: + # promote large uint64 to nullable UInt64 + cast_map[col] = "UInt64" + else: + if ser.max() >= BOUND or ser.min() <= -BOUND: + # promote large int64 to nullable Int64 + cast_map[col] = "Int64" + return df.astype(cast_map) if cast_map else df + + # Only cast frames whose index expand to the union (i.e., get on align) + union_index = self.index.union(other.index) + if not self.index.equals(union_index): + self = _cast_large_numpy_ints_to_nullable(self) + if not other.index.equals(union_index): + other = _cast_large_numpy_ints_to_nullable(other) + combined = self.combine(other, combiner, overwrite=False) dtypes = { From f7867631ab601bd171d7d434bd63f4c4ef8499e7 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Tue, 14 Oct 2025 21:52:24 -0400 Subject: [PATCH 02/22] use difference not union to avoid sorting --- pandas/core/frame.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6b0337d06057a..61e7fe1ce3742 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9185,10 +9185,9 @@ def _cast_large_numpy_ints_to_nullable(df: DataFrame) -> DataFrame: return df.astype(cast_map) if cast_map else df # Only cast frames whose index expand to the union (i.e., get on align) - union_index = self.index.union(other.index) - if not self.index.equals(union_index): + if len(other.index.difference(self.index, sort=False)): self = _cast_large_numpy_ints_to_nullable(self) - if not other.index.equals(union_index): + if len(self.index.difference(other.index, sort=False)): other = _cast_large_numpy_ints_to_nullable(other) combined = self.combine(other, combiner, overwrite=False) From ae99b3d56cb3f9d71be5885d9562c0de3ed21c53 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Tue, 14 Oct 2025 22:09:12 -0400 Subject: [PATCH 03/22] fix typo, fewer comments --- pandas/core/frame.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 61e7fe1ce3742..319611ac49d84 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9163,9 +9163,7 @@ def combiner(x: Series, y: Series): ) combined = combined.astype(other.dtypes) else: - # GH #60128 - # Promote large 64-bit integers to their nullable types. - # Without this, precision will be lost in a float64 rount-trip. + # GH#60128 Avoid lossy conversion to float64 def _cast_large_numpy_ints_to_nullable(df: DataFrame) -> DataFrame: BOUND = 2**53 cast_map: dict[str, str] = {} @@ -9184,7 +9182,7 @@ def _cast_large_numpy_ints_to_nullable(df: DataFrame) -> DataFrame: cast_map[col] = "Int64" return df.astype(cast_map) if cast_map else df - # Only cast frames whose index expand to the union (i.e., get on align) + # Cast any side that will gain rows on outer align (introduces ). if len(other.index.difference(self.index, sort=False)): self = _cast_large_numpy_ints_to_nullable(self) if len(self.index.difference(other.index, sort=False)): From 6e77fc9711750a3f19a5d7caf46801b52845d897 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Tue, 14 Oct 2025 23:45:10 -0400 Subject: [PATCH 04/22] refactor --- pandas/core/frame.py | 46 ++++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 319611ac49d84..28b775d149de4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9141,21 +9141,13 @@ def combine_first(self, other: DataFrame) -> DataFrame: 1 0.0 3.0 1.0 2 NaN 3.0 1.0 """ - from pandas.core.dtypes.common import ( - is_extension_array_dtype, - is_integer_dtype, - is_unsigned_integer_dtype, - ) def combiner(x: Series, y: Series): - mask = x.isna() - - # If the column y in other DataFrame is not in first DataFrame, - # just return y. - if y.name not in self.columns: - return y - - return y.where(mask, x) + # GH#60128 Preserve EA dtypes by operating at the Series level. + # If 'y' is a new column, return it as-is; otherwise fill in 'x' + # from 'y'. Avoids dropping to NumPy arrays (which would lose + # Int64/UInt64 and reintroduce float64 paths). + return y if y.name not in self.columns else y.where(x.isna(), x) if len(other) == 0: combined = self.reindex( @@ -9163,23 +9155,23 @@ def combiner(x: Series, y: Series): ) combined = combined.astype(other.dtypes) else: - # GH#60128 Avoid lossy conversion to float64 + # GH#60128 Avoid precision loss from int64/uint64 -> float64 round-trip. + # Promote NumPy int64/uint64 to nullable Int64/UInt64 only when values + # exceed float64's exact range (|x| >= 2**53). This keeps alignment that + # introduces from forcing a lossy cast. def _cast_large_numpy_ints_to_nullable(df: DataFrame) -> DataFrame: - BOUND = 2**53 + BOUND = 2**53 # first non-exact integer for float64 cast_map: dict[str, str] = {} + for col, dt in df.dtypes.items(): - if is_integer_dtype(dt) and not is_extension_array_dtype(dt): - ser = df[col] - if ser.size == 0: - continue - if is_unsigned_integer_dtype(dt): - if ser.max() >= BOUND: - # promote large uint64 to nullable UInt64 - cast_map[col] = "UInt64" - else: - if ser.max() >= BOUND or ser.min() <= -BOUND: - # promote large int64 to nullable Int64 - cast_map[col] = "Int64" + ser = df[col] + if dt == np.dtype("uint64"): + if ser.size and ser.max() >= BOUND: + cast_map[col] = "UInt64" + elif dt == np.dtype("int64"): + if ser.size and (ser.max() >= BOUND or ser.min() <= -BOUND): + cast_map[col] = "Int64" + return df.astype(cast_map) if cast_map else df # Cast any side that will gain rows on outer align (introduces ). From ea7d2ba88d0f24b94dee46cadfeadfd1e5f23f2d Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Wed, 15 Oct 2025 08:21:43 -0400 Subject: [PATCH 05/22] add news --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/frame.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 448ceffdaa1eb..71ec23b68fe66 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1145,6 +1145,7 @@ Reshaping ^^^^^^^^^ - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`) - Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`) +- Bug in :meth:`DataFrame.combine_first` where large ``int64``/``uint64`` values could lose precision when an outer alignment introduced missing values. (:issue:`60128`) - Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`) - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 28b775d149de4..f8ca3111aa819 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9174,7 +9174,7 @@ def _cast_large_numpy_ints_to_nullable(df: DataFrame) -> DataFrame: return df.astype(cast_map) if cast_map else df - # Cast any side that will gain rows on outer align (introduces ). + # Only need to cast sides that gain rows on outer align (introduces ). if len(other.index.difference(self.index, sort=False)): self = _cast_large_numpy_ints_to_nullable(self) if len(self.index.difference(other.index, sort=False)): From 301d85f14bc78b192c85231aa40d1b09f9394bcc Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Wed, 15 Oct 2025 12:51:19 -0400 Subject: [PATCH 06/22] always use nullable Int for 64-bit ints --- pandas/core/frame.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f8ca3111aa819..a2d244fa488f4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9155,30 +9155,23 @@ def combiner(x: Series, y: Series): ) combined = combined.astype(other.dtypes) else: - # GH#60128 Avoid precision loss from int64/uint64 -> float64 round-trip. - # Promote NumPy int64/uint64 to nullable Int64/UInt64 only when values - # exceed float64's exact range (|x| >= 2**53). This keeps alignment that - # introduces from forcing a lossy cast. - def _cast_large_numpy_ints_to_nullable(df: DataFrame) -> DataFrame: - BOUND = 2**53 # first non-exact integer for float64 + # GH#60128 Avoid precision loss from int64/uint64 <-> float64 round-trip. + def _cast_64_bit_ints_to_nullable(df: DataFrame) -> DataFrame: cast_map: dict[str, str] = {} for col, dt in df.dtypes.items(): - ser = df[col] if dt == np.dtype("uint64"): - if ser.size and ser.max() >= BOUND: - cast_map[col] = "UInt64" + cast_map[col] = "UInt64" elif dt == np.dtype("int64"): - if ser.size and (ser.max() >= BOUND or ser.min() <= -BOUND): - cast_map[col] = "Int64" + cast_map[col] = "Int64" return df.astype(cast_map) if cast_map else df # Only need to cast sides that gain rows on outer align (introduces ). if len(other.index.difference(self.index, sort=False)): - self = _cast_large_numpy_ints_to_nullable(self) + self = _cast_64_bit_ints_to_nullable(self) if len(self.index.difference(other.index, sort=False)): - other = _cast_large_numpy_ints_to_nullable(other) + other = _cast_64_bit_ints_to_nullable(other) combined = self.combine(other, combiner, overwrite=False) From a6b461c9ef9bf11ea0b18b097b074862b7ba28c8 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Sat, 18 Oct 2025 21:26:51 -0400 Subject: [PATCH 07/22] always upcast, for predictability --- pandas/core/frame.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a2d244fa488f4..3047f0878a9d2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9156,7 +9156,7 @@ def combiner(x: Series, y: Series): combined = combined.astype(other.dtypes) else: # GH#60128 Avoid precision loss from int64/uint64 <-> float64 round-trip. - def _cast_64_bit_ints_to_nullable(df: DataFrame) -> DataFrame: + def _promote_ints_to_nullable(df: DataFrame) -> DataFrame: cast_map: dict[str, str] = {} for col, dt in df.dtypes.items(): @@ -9167,11 +9167,8 @@ def _cast_64_bit_ints_to_nullable(df: DataFrame) -> DataFrame: return df.astype(cast_map) if cast_map else df - # Only need to cast sides that gain rows on outer align (introduces ). - if len(other.index.difference(self.index, sort=False)): - self = _cast_64_bit_ints_to_nullable(self) - if len(self.index.difference(other.index, sort=False)): - other = _cast_64_bit_ints_to_nullable(other) + self = _promote_ints_to_nullable(self) + other = _promote_ints_to_nullable(other) combined = self.combine(other, combiner, overwrite=False) From e15bde98d6dee8a595585ad5f568a24f2414b0c6 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Sun, 19 Oct 2025 13:30:29 -0400 Subject: [PATCH 08/22] make wide ints nullable before align and restore after combining --- pandas/core/frame.py | 79 +++++++++++++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 20 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3047f0878a9d2..58983529ab651 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9025,6 +9025,52 @@ def combine( 1 0.0 3.0 1.0 2 NaN 3.0 1.0 """ + + # GH#62691 Prevent lossy conversion of wide integers + # by proactively promoting them to their nullable versions + # because an outer align will force a round trip through float64. + def _promote_wide_ints(df: DataFrame) -> DataFrame: + """Promotes int64/uint64 columns to their nullable versions.""" + cast_map: dict[str, str] = {} + for col, dt in df.dtypes.items(): + if dt == np.dtype("int64"): + cast_map[col] = "Int64" + elif dt == np.dtype("uint64"): + cast_map[col] = "UInt64" + + if cast_map: + df = df.astype(cast_map) + return df + + # store originals before promotion + self_original = self + other_original = other + self = _promote_wide_ints(self) + other = _promote_wide_ints(other) + + def _restore_wide_ints(df: DataFrame): + """Restores previously int64/uint64 columns if they don't have NAs.""" + cast_map: dict[str, str] = {} + for col in df.columns: + ser = df[col] + orig_dt_self = self_original.dtypes.get(col) + orig_dt_other = other_original.dtypes.get(col) + + is_at_risk = (orig_dt_self in [np.int64, np.uint64]) or ( + orig_dt_other in [np.int64, np.uint64] + ) + + if is_at_risk and not isna(ser).any(): + dtypes_to_resolve = [ + dt for dt in (orig_dt_self, orig_dt_other) if dt is not None + ] + if dtypes_to_resolve: + cast_map[col] = find_common_type(dtypes_to_resolve) + + if cast_map: + df = df.astype(cast_map) + return df + other_idxlen = len(other.index) # save for compare other_columns = other.columns @@ -9092,6 +9138,7 @@ def combine( # convert_objects just in case frame_result = self._constructor(result, index=new_index, columns=new_columns) + frame_result = _restore_wide_ints(frame_result) return frame_result.__finalize__(self, method="combine") def combine_first(self, other: DataFrame) -> DataFrame: @@ -9141,13 +9188,20 @@ def combine_first(self, other: DataFrame) -> DataFrame: 1 0.0 3.0 1.0 2 NaN 3.0 1.0 """ + from pandas.core.computation import expressions def combiner(x: Series, y: Series): - # GH#60128 Preserve EA dtypes by operating at the Series level. - # If 'y' is a new column, return it as-is; otherwise fill in 'x' - # from 'y'. Avoids dropping to NumPy arrays (which would lose - # Int64/UInt64 and reintroduce float64 paths). - return y if y.name not in self.columns else y.where(x.isna(), x) + mask = x.isna()._values + + x_values = x._values + y_values = y._values + + # If the column y in other DataFrame is not in first DataFrame, + # just return y_values. + if y.name not in self.columns: + return y_values + + return expressions.where(mask, y_values, x_values) if len(other) == 0: combined = self.reindex( @@ -9155,21 +9209,6 @@ def combiner(x: Series, y: Series): ) combined = combined.astype(other.dtypes) else: - # GH#60128 Avoid precision loss from int64/uint64 <-> float64 round-trip. - def _promote_ints_to_nullable(df: DataFrame) -> DataFrame: - cast_map: dict[str, str] = {} - - for col, dt in df.dtypes.items(): - if dt == np.dtype("uint64"): - cast_map[col] = "UInt64" - elif dt == np.dtype("int64"): - cast_map[col] = "Int64" - - return df.astype(cast_map) if cast_map else df - - self = _promote_ints_to_nullable(self) - other = _promote_ints_to_nullable(other) - combined = self.combine(other, combiner, overwrite=False) dtypes = { From 451621b3a64949ffc113f70ddf6414c62e57f1a6 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Sun, 19 Oct 2025 13:36:12 -0400 Subject: [PATCH 09/22] update test expectations (don't convert to float64 when Int64 or UInt64 does the trick) --- pandas/tests/frame/methods/test_combine_first.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 1e594043510ea..9729e3e434580 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -475,6 +475,7 @@ def test_combine_first_with_nan_multiindex(): "d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan], }, index=mi_expected, + dtype="Int64", ) tm.assert_frame_equal(res, expected) @@ -521,6 +522,7 @@ def test_combine_first_duplicates_rows_for_nan_index_values(): index=MultiIndex.from_arrays( [[1, 2, 3, 4], [np.nan, 5, 6, 7]], names=["a", "b"] ), + dtype="Int64", ) combined = df1.combine_first(df2) tm.assert_frame_equal(combined, expected) From 4fdc45911e4ef42f5280a7f17edad36325aef30d Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Sun, 19 Oct 2025 13:51:22 -0400 Subject: [PATCH 10/22] add type hint --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 58983529ab651..a7a4269b527b3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9048,7 +9048,7 @@ def _promote_wide_ints(df: DataFrame) -> DataFrame: self = _promote_wide_ints(self) other = _promote_wide_ints(other) - def _restore_wide_ints(df: DataFrame): + def _restore_wide_ints(df: DataFrame) -> DataFrame: """Restores previously int64/uint64 columns if they don't have NAs.""" cast_map: dict[str, str] = {} for col in df.columns: From ef662a0df87a4405bf4f93cc62d130454d3a0f74 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Sun, 19 Oct 2025 16:36:25 -0400 Subject: [PATCH 11/22] small refactor --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/frame.py | 28 ++++++++++--------- .../tests/frame/methods/test_combine_first.py | 2 +- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 71ec23b68fe66..e36e807b48d2d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1145,7 +1145,7 @@ Reshaping ^^^^^^^^^ - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`) - Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`) -- Bug in :meth:`DataFrame.combine_first` where large ``int64``/``uint64`` values could lose precision when an outer alignment introduced missing values. (:issue:`60128`) +- Bug in :meth:`DataFrame.combine_first` where very large integers could lose precision after the operation. (:issue:`60128`) - Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`) - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a7a4269b527b3..3a7f25971d966 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9026,7 +9026,7 @@ def combine( 2 NaN 3.0 1.0 """ - # GH#62691 Prevent lossy conversion of wide integers + # GH#60128 Prevent lossy conversion of wide integers # by proactively promoting them to their nullable versions # because an outer align will force a round trip through float64. def _promote_wide_ints(df: DataFrame) -> DataFrame: @@ -9042,17 +9042,13 @@ def _promote_wide_ints(df: DataFrame) -> DataFrame: df = df.astype(cast_map) return df - # store originals before promotion - self_original = self - other_original = other - self = _promote_wide_ints(self) - other = _promote_wide_ints(other) - - def _restore_wide_ints(df: DataFrame) -> DataFrame: + def _restore_wide_ints( + self_original: DataFrame, other_original: DataFrame, combined_df: DataFrame + ) -> DataFrame: """Restores previously int64/uint64 columns if they don't have NAs.""" cast_map: dict[str, str] = {} - for col in df.columns: - ser = df[col] + for col in combined_df.columns: + ser = combined_df[col] orig_dt_self = self_original.dtypes.get(col) orig_dt_other = other_original.dtypes.get(col) @@ -9068,8 +9064,14 @@ def _restore_wide_ints(df: DataFrame) -> DataFrame: cast_map[col] = find_common_type(dtypes_to_resolve) if cast_map: - df = df.astype(cast_map) - return df + combined_df = combined_df.astype(cast_map) + return combined_df + + # store originals and promote wide ints before align + self_original = self + other_original = other + self = _promote_wide_ints(self) + other = _promote_wide_ints(other) other_idxlen = len(other.index) # save for compare other_columns = other.columns @@ -9138,7 +9140,7 @@ def _restore_wide_ints(df: DataFrame) -> DataFrame: # convert_objects just in case frame_result = self._constructor(result, index=new_index, columns=new_columns) - frame_result = _restore_wide_ints(frame_result) + frame_result = _restore_wide_ints(self_original, other_original, frame_result) return frame_result.__finalize__(self, method="combine") def combine_first(self, other: DataFrame) -> DataFrame: diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 9729e3e434580..ac0005a689445 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -472,7 +472,7 @@ def test_combine_first_with_nan_multiindex(): expected = DataFrame( { "c": [np.nan, np.nan, 1, 1, 1, 1, 1, np.nan, 1, np.nan, 1], - "d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan], + "d": [1, 4, np.nan, 2, 5, np.nan, np.nan, 3, np.nan, 6, np.nan], }, index=mi_expected, dtype="Int64", From fefadcbd3410f0dcb6911b3716a8efb3d727e010 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Sun, 19 Oct 2025 17:10:39 -0400 Subject: [PATCH 12/22] combine_first's combiner must preserve EA dtypes --- pandas/core/frame.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3a7f25971d966..ab7caa50cb19f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9026,9 +9026,7 @@ def combine( 2 NaN 3.0 1.0 """ - # GH#60128 Prevent lossy conversion of wide integers - # by proactively promoting them to their nullable versions - # because an outer align will force a round trip through float64. + # GH#60128 Prevent lossy conversion of wide integers to float64. def _promote_wide_ints(df: DataFrame) -> DataFrame: """Promotes int64/uint64 columns to their nullable versions.""" cast_map: dict[str, str] = {} @@ -9190,20 +9188,10 @@ def combine_first(self, other: DataFrame) -> DataFrame: 1 0.0 3.0 1.0 2 NaN 3.0 1.0 """ - from pandas.core.computation import expressions def combiner(x: Series, y: Series): - mask = x.isna()._values - - x_values = x._values - y_values = y._values - - # If the column y in other DataFrame is not in first DataFrame, - # just return y_values. - if y.name not in self.columns: - return y_values - - return expressions.where(mask, y_values, x_values) + # GH#60128 The combiner must preserve EA dtypes + return y if y.name not in self.columns else y.where(x.isna(), x) if len(other) == 0: combined = self.reindex( From f80917d2d0a7bf35c74514400935f1e0c24ccb73 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Sun, 19 Oct 2025 17:46:13 -0400 Subject: [PATCH 13/22] clearer comments --- pandas/core/frame.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ab7caa50cb19f..3c07f6746c9be 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9026,7 +9026,9 @@ def combine( 2 NaN 3.0 1.0 """ - # GH#60128 Prevent lossy conversion of wide integers to float64. + # GH#60128 Integers n where |n| > 2**53 would lose precision after align + # upcasts them to float. Avoid lossy conversion by preemptively promoting + # int64 and uint64 Dtypes to their nullable EA Dtypes, Int64 and UInt64. def _promote_wide_ints(df: DataFrame) -> DataFrame: """Promotes int64/uint64 columns to their nullable versions.""" cast_map: dict[str, str] = {} @@ -9059,6 +9061,7 @@ def _restore_wide_ints( dt for dt in (orig_dt_self, orig_dt_other) if dt is not None ] if dtypes_to_resolve: + # if we had different dtypes, possibly promote cast_map[col] = find_common_type(dtypes_to_resolve) if cast_map: @@ -9190,7 +9193,7 @@ def combine_first(self, other: DataFrame) -> DataFrame: """ def combiner(x: Series, y: Series): - # GH#60128 The combiner must preserve EA dtypes + # GH#60128 The combiner is supposed to preserve EA Dtypes. return y if y.name not in self.columns else y.where(x.isna(), x) if len(other) == 0: From bf69fad22569131adef8980175725ec0861a3626 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Sun, 19 Oct 2025 19:12:08 -0400 Subject: [PATCH 14/22] create new test for issue --- pandas/core/frame.py | 6 +++--- pandas/tests/frame/methods/test_combine_first.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3c07f6746c9be..876c9a763e831 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9052,16 +9052,16 @@ def _restore_wide_ints( orig_dt_self = self_original.dtypes.get(col) orig_dt_other = other_original.dtypes.get(col) - is_at_risk = (orig_dt_self in [np.int64, np.uint64]) or ( + was_promoted = (orig_dt_self in [np.int64, np.uint64]) or ( orig_dt_other in [np.int64, np.uint64] ) - if is_at_risk and not isna(ser).any(): + if was_promoted and not isna(ser).any(): dtypes_to_resolve = [ dt for dt in (orig_dt_self, orig_dt_other) if dt is not None ] if dtypes_to_resolve: - # if we had different dtypes, possibly promote + # if we had different dtypes, reconcile cast_map[col] = find_common_type(dtypes_to_resolve) if cast_map: diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index ac0005a689445..17370532df0c0 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -398,6 +398,21 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype): ).set_index(["a", "b"]) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "wide_val, dtype, EAdtype", + ( + (1666880195890293744, "uint64", "UInt64"), + (-1666880195890293744, "int64", "Int64"), + ), + ) + def test_combine_first_preserve_precision(self, wide_val, dtype, EAdtype): + # GH#60128 + df1 = DataFrame({"A": [wide_val, 5]}, dtype=dtype) + df2 = DataFrame({"A": [6, 7, wide_val]}, dtype=dtype) + result = df1.combine_first(df2) + expected = DataFrame({"A": [wide_val, 5, wide_val]}, dtype=dtype) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "scalar1, scalar2", From 016c64e43f2495e94dc078e3fa27cbdad9a259e6 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Sun, 19 Oct 2025 19:32:37 -0400 Subject: [PATCH 15/22] clean up test --- pandas/tests/frame/methods/test_combine_first.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 17370532df0c0..a17f4be1d5aa5 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -399,13 +399,13 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( - "wide_val, dtype, EAdtype", + "wide_val, dtype", ( - (1666880195890293744, "uint64", "UInt64"), - (-1666880195890293744, "int64", "Int64"), + (1666880195890293744, "uint64"), + (-1666880195890293744, "int64"), ), ) - def test_combine_first_preserve_precision(self, wide_val, dtype, EAdtype): + def test_combine_first_preserve_precision(self, wide_val, dtype): # GH#60128 df1 = DataFrame({"A": [wide_val, 5]}, dtype=dtype) df2 = DataFrame({"A": [6, 7, wide_val]}, dtype=dtype) From 2928ceefa144bd69f5a12b69ea4f48146c205f00 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Mon, 20 Oct 2025 08:31:21 -0400 Subject: [PATCH 16/22] don't break any other tests, but comment why it may be worth it --- pandas/core/frame.py | 15 ++++++++++++--- pandas/tests/frame/methods/test_combine_first.py | 2 -- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 876c9a763e831..cccab6f05a058 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9056,13 +9056,22 @@ def _restore_wide_ints( orig_dt_other in [np.int64, np.uint64] ) - if was_promoted and not isna(ser).any(): + if was_promoted: dtypes_to_resolve = [ dt for dt in (orig_dt_self, orig_dt_other) if dt is not None ] if dtypes_to_resolve: - # if we had different dtypes, reconcile - cast_map[col] = find_common_type(dtypes_to_resolve) + if isna(ser).any(): + # Currently, align upcasts to float64 when NAs are present. + # Do this so we don't have to modify any tests that expect + # float dtype when NAs are present. BUT we could consider + # embracing nullable integer dtype since large integers are + # still losing information on conversion to float -- it's + # just not obvious because they aren't cast back to int + # when NAs are present. + dtypes_to_resolve.append(np.dtype("float64")) + target_type = find_common_type(dtypes_to_resolve) + cast_map[col] = target_type if cast_map: combined_df = combined_df.astype(cast_map) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index a17f4be1d5aa5..ddedcece798c1 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -490,7 +490,6 @@ def test_combine_first_with_nan_multiindex(): "d": [1, 4, np.nan, 2, 5, np.nan, np.nan, 3, np.nan, 6, np.nan], }, index=mi_expected, - dtype="Int64", ) tm.assert_frame_equal(res, expected) @@ -537,7 +536,6 @@ def test_combine_first_duplicates_rows_for_nan_index_values(): index=MultiIndex.from_arrays( [[1, 2, 3, 4], [np.nan, 5, 6, 7]], names=["a", "b"] ), - dtype="Int64", ) combined = df1.combine_first(df2) tm.assert_frame_equal(combined, expected) From 1a53d485acf1267015e7cf2896a85998f8ead55e Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Mon, 20 Oct 2025 08:32:37 -0400 Subject: [PATCH 17/22] preserve old test --- pandas/tests/frame/methods/test_combine_first.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index ddedcece798c1..a837f66dc8762 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -487,7 +487,7 @@ def test_combine_first_with_nan_multiindex(): expected = DataFrame( { "c": [np.nan, np.nan, 1, 1, 1, 1, 1, np.nan, 1, np.nan, 1], - "d": [1, 4, np.nan, 2, 5, np.nan, np.nan, 3, np.nan, 6, np.nan], + "d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan], }, index=mi_expected, ) From 7e6837f46d4633c0f2dedf2c5ddc8f5777fde91b Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Mon, 20 Oct 2025 08:36:28 -0400 Subject: [PATCH 18/22] thinner comment --- pandas/core/frame.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cccab6f05a058..3001cc4edf274 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9062,13 +9062,14 @@ def _restore_wide_ints( ] if dtypes_to_resolve: if isna(ser).any(): - # Currently, align upcasts to float64 when NAs are present. - # Do this so we don't have to modify any tests that expect - # float dtype when NAs are present. BUT we could consider - # embracing nullable integer dtype since large integers are - # still losing information on conversion to float -- it's - # just not obvious because they aren't cast back to int - # when NAs are present. + # Currently, align upcasts to float64 when NAs are + # present. Do this so we don't have to modify any + # tests that expect float dtype when NAs are + # present. BUT we could consider embracing nullable + # integer dtypes since large integers are still + # losing information on conversion to float -- it's + # just not obvious because they aren't cast back to + # int when NAs are present. dtypes_to_resolve.append(np.dtype("float64")) target_type = find_common_type(dtypes_to_resolve) cast_map[col] = target_type From 444deaa5d5dbb811aec9a2115c35150ff10be07f Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Mon, 20 Oct 2025 22:45:26 -0400 Subject: [PATCH 19/22] clearer comments --- pandas/core/frame.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3001cc4edf274..27dfcac1a9bb0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9028,9 +9028,9 @@ def combine( # GH#60128 Integers n where |n| > 2**53 would lose precision after align # upcasts them to float. Avoid lossy conversion by preemptively promoting - # int64 and uint64 Dtypes to their nullable EA Dtypes, Int64 and UInt64. + # int64 and uint64 to their nullable ExtensionDtypes, Int64 and UInt64. def _promote_wide_ints(df: DataFrame) -> DataFrame: - """Promotes int64/uint64 columns to their nullable versions.""" + """Promotes int64/uint64 columns to their nullable ExtensionDtypes.""" cast_map: dict[str, str] = {} for col, dt in df.dtypes.items(): if dt == np.dtype("int64"): @@ -9042,10 +9042,13 @@ def _promote_wide_ints(df: DataFrame) -> DataFrame: df = df.astype(cast_map) return df + # To maintain backwards compatibility, this function can restore + # int64/uint64 columns from float64 when possible. But we should + # really consider just embracing nullable ExtensionDtypes instead. def _restore_wide_ints( self_original: DataFrame, other_original: DataFrame, combined_df: DataFrame ) -> DataFrame: - """Restores previously int64/uint64 columns if they don't have NAs.""" + """Restores original dtypes by re-casting the promoted int columns.""" cast_map: dict[str, str] = {} for col in combined_df.columns: ser = combined_df[col] @@ -9062,14 +9065,13 @@ def _restore_wide_ints( ] if dtypes_to_resolve: if isna(ser).any(): - # Currently, align upcasts to float64 when NAs are - # present. Do this so we don't have to modify any - # tests that expect float dtype when NAs are - # present. BUT we could consider embracing nullable - # integer dtypes since large integers are still - # losing information on conversion to float -- it's - # just not obvious because they aren't cast back to - # int when NAs are present. + # If there are NAs, we can't safely downcast back + # to int. Previously, we left the data as float64. + # However, converting large integers to float can + # lose precision, even if it's not immediately + # obvious (since we don't cast back). Consider + # embracing nullable ExtensionDtypes instead + # and dropping this whole restoration step. dtypes_to_resolve.append(np.dtype("float64")) target_type = find_common_type(dtypes_to_resolve) cast_map[col] = target_type From 49ff1a5be0338217c96e81f46494b6eb670efd65 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Tue, 21 Oct 2025 21:55:38 -0400 Subject: [PATCH 20/22] follow contributing guidelines --- pandas/core/frame.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 27dfcac1a9bb0..690bf043ac318 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9029,31 +9029,31 @@ def combine( # GH#60128 Integers n where |n| > 2**53 would lose precision after align # upcasts them to float. Avoid lossy conversion by preemptively promoting # int64 and uint64 to their nullable ExtensionDtypes, Int64 and UInt64. - def _promote_wide_ints(df: DataFrame) -> DataFrame: - """Promotes int64/uint64 columns to their nullable ExtensionDtypes.""" + def _ensure_nullable_int64_dtypes(df: DataFrame) -> DataFrame: + """Promote int64/uint64 DataFrame columns to Int64/UInt64.""" cast_map: dict[str, str] = {} for col, dt in df.dtypes.items(): - if dt == np.dtype("int64"): + if dt == np.int64: cast_map[col] = "Int64" - elif dt == np.dtype("uint64"): + elif dt == np.uint64: cast_map[col] = "UInt64" if cast_map: df = df.astype(cast_map) return df - # To maintain backwards compatibility, this function can restore - # int64/uint64 columns from float64 when possible. But we should - # really consider just embracing nullable ExtensionDtypes instead. - def _restore_wide_ints( - self_original: DataFrame, other_original: DataFrame, combined_df: DataFrame + # To maintain backwards compatibility, downcast the pre-promoted int64 + # columns of the combined DataFrame back to how they would have resolved. + # Consider just embracing nullable ExtensionDtypes instead, though. + def _revert_int64_dtype_promotion( + self_orig: DataFrame, other_orig: DataFrame, combined_df: DataFrame ) -> DataFrame: - """Restores original dtypes by re-casting the promoted int columns.""" + """Resolve the combined dtypes according to the original dtypes.""" cast_map: dict[str, str] = {} for col in combined_df.columns: ser = combined_df[col] - orig_dt_self = self_original.dtypes.get(col) - orig_dt_other = other_original.dtypes.get(col) + orig_dt_self = self_orig.dtypes.get(col) + orig_dt_other = other_orig.dtypes.get(col) was_promoted = (orig_dt_self in [np.int64, np.uint64]) or ( orig_dt_other in [np.int64, np.uint64] @@ -9080,11 +9080,11 @@ def _restore_wide_ints( combined_df = combined_df.astype(cast_map) return combined_df - # store originals and promote wide ints before align - self_original = self - other_original = other - self = _promote_wide_ints(self) - other = _promote_wide_ints(other) + # store originals and prepare for align + self_orig = self + other_orig = other + self = _ensure_nullable_int64_dtypes(self) + other = _ensure_nullable_int64_dtypes(other) other_idxlen = len(other.index) # save for compare other_columns = other.columns @@ -9153,7 +9153,9 @@ def _restore_wide_ints( # convert_objects just in case frame_result = self._constructor(result, index=new_index, columns=new_columns) - frame_result = _restore_wide_ints(self_original, other_original, frame_result) + frame_result = _revert_int64_dtype_promotion( + self_orig, other_orig, frame_result + ) return frame_result.__finalize__(self, method="combine") def combine_first(self, other: DataFrame) -> DataFrame: From 747e8bcdcb9d0d4de440df499996bcaaed667289 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Tue, 21 Oct 2025 22:07:43 -0400 Subject: [PATCH 21/22] move news from reshaping to numeric --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e36e807b48d2d..b7256023bc66c 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -992,6 +992,7 @@ Numeric ^^^^^^^ - Bug in :func:`api.types.infer_dtype` returning "mixed" for complex and ``pd.NA`` mix (:issue:`61976`) - Bug in :func:`api.types.infer_dtype` returning "mixed-integer-float" for float and ``pd.NA`` mix (:issue:`61621`) +- Bug in :meth:`DataFrame.combine` and :meth:`DataFrame.combine_first` where integers with absolute value greater than ``2**53`` could lose precision after the operation. (:issue:`60128`) - Bug in :meth:`DataFrame.corr` where numerical precision errors resulted in correlations above ``1.0`` (:issue:`61120`) - Bug in :meth:`DataFrame.cov` raises a ``TypeError`` instead of returning potentially incorrect results or other errors (:issue:`53115`) - Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`) @@ -1145,7 +1146,6 @@ Reshaping ^^^^^^^^^ - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`) - Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`) -- Bug in :meth:`DataFrame.combine_first` where very large integers could lose precision after the operation. (:issue:`60128`) - Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`) - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`) From c527bc0e9640b36c5b1c4c140c16c203f483e5b6 Mon Sep 17 00:00:00 2001 From: Angela Liss <59097311+angela-tarantula@users.noreply.github.com> Date: Tue, 21 Oct 2025 22:47:58 -0400 Subject: [PATCH 22/22] use correct typing --- pandas/core/frame.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 690bf043ac318..b64bc5d786ed3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -144,6 +144,10 @@ PeriodArray, TimedeltaArray, ) +from pandas.core.arrays.integer import ( + Int64Dtype, + UInt64Dtype, +) from pandas.core.arrays.sparse import SparseFrameAccessor from pandas.core.arrays.string_ import StringDtype from pandas.core.construction import ( @@ -9031,12 +9035,12 @@ def combine( # int64 and uint64 to their nullable ExtensionDtypes, Int64 and UInt64. def _ensure_nullable_int64_dtypes(df: DataFrame) -> DataFrame: """Promote int64/uint64 DataFrame columns to Int64/UInt64.""" - cast_map: dict[str, str] = {} + cast_map: dict[IndexLabel, DtypeObj] = {} for col, dt in df.dtypes.items(): if dt == np.int64: - cast_map[col] = "Int64" + cast_map[col] = Int64Dtype() elif dt == np.uint64: - cast_map[col] = "UInt64" + cast_map[col] = UInt64Dtype() if cast_map: df = df.astype(cast_map) @@ -9049,7 +9053,7 @@ def _revert_int64_dtype_promotion( self_orig: DataFrame, other_orig: DataFrame, combined_df: DataFrame ) -> DataFrame: """Resolve the combined dtypes according to the original dtypes.""" - cast_map: dict[str, str] = {} + cast_map: dict[IndexLabel, DtypeObj] = {} for col in combined_df.columns: ser = combined_df[col] orig_dt_self = self_orig.dtypes.get(col) @@ -9072,7 +9076,7 @@ def _revert_int64_dtype_promotion( # obvious (since we don't cast back). Consider # embracing nullable ExtensionDtypes instead # and dropping this whole restoration step. - dtypes_to_resolve.append(np.dtype("float64")) + dtypes_to_resolve.append(np.dtype(np.float64)) target_type = find_common_type(dtypes_to_resolve) cast_map[col] = target_type