Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8458,6 +8458,28 @@ def _arith_method_with_reindex(self, right: DataFrame, op) -> DataFrame:
# to avoid constructing two potentially large/sparse DataFrames
join_columns = left.columns.join(right.columns, how="outer")

# GH#63288 Preserve ExtensionDtype (e.g. pyarrow) when reindexing
# introduces missing columns
from pandas.core.dtypes.base import ExtensionDtype

missing_cols = [c for c in join_columns if c not in result.columns]

if missing_cols:
for col in missing_cols:
src = left[col] if col in left.columns else right[col]

if isinstance(src.dtype, ExtensionDtype):
# Create NA-filled Series with same ExtensionDtype
fill = src.iloc[:0].reindex(result.index)
else:
# Fallback to existing NumPy behavior
fill = self._constructor_sliced(
[np.nan] * len(result.index),
index=result.index,
)

result[col] = fill

if result.columns.has_duplicates:
# Avoid reindexing with a duplicate axis.
# https://github.com/pandas-dev/pandas/issues/35194
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2201,3 +2201,28 @@ def test_mixed_col_index_dtype(string_dtype_no_object):
expected.columns = expected.columns.astype(string_dtype_no_object)

tm.assert_frame_equal(result, expected)


def test_arith_reindex_with_pyarrow_dtype():
# GH#63288 - Preserve pyarrow dtypes when reindexing introduces
# missing columns
pytest.importorskip("pyarrow")

df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
df1 = df.iloc[:, :2].astype("int64[pyarrow]") # columns: a, b
df2 = df.iloc[1:, 1:].astype("int64[pyarrow]") # columns: b, c

result = df1 + df2
expected = DataFrame(
{
"a": pd.array([pd.NA, pd.NA, pd.NA], dtype="int64[pyarrow]"),
"b": pd.array([pd.NA, 10, 12], dtype="int64[pyarrow]"),
"c": pd.array([pd.NA, pd.NA, pd.NA], dtype="int64[pyarrow]"),
}
)
tm.assert_frame_equal(result, expected)

# Verify all columns preserved ExtensionDtype
assert str(result["a"].dtype) == "int64[pyarrow]"
assert str(result["b"].dtype) == "int64[pyarrow]"
assert str(result["c"].dtype) == "int64[pyarrow]"
Loading