Skip to content

Commit

Permalink
BUG: fix combine_first reorders columns (#60791)
Browse files Browse the repository at this point in the history
* Add test

* Fix combine_first reorders columns

* Add whatsnew

* Fix corner case when self is empty and future.infer_string is True

* Update
  • Loading branch information
yuanx749 authored Jan 27, 2025
1 parent 84bf1ef commit e36b000
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,7 @@ Groupby/resample/rolling
Reshaping
^^^^^^^^^
- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)
- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8671,6 +8671,7 @@ def combine(
2 NaN 3.0 1.0
"""
other_idxlen = len(other.index) # save for compare
other_columns = other.columns

this, other = self.align(other)
new_index = this.index
Expand All @@ -8681,8 +8682,8 @@ def combine(
if self.empty and len(other) == other_idxlen:
return other.copy()

# sorts if possible; otherwise align above ensures that these are set-equal
new_columns = this.columns.union(other.columns)
# preserve column order
new_columns = self.columns.union(other_columns, sort=False)
do_fill = fill_value is not None
result = {}
for col in new_columns:
Expand Down
12 changes: 11 additions & 1 deletion pandas/tests/frame/methods/test_combine_first.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ def test_combine_first_with_asymmetric_other(self, val):
df2 = DataFrame({"isBool": [True]})

res = df1.combine_first(df2)
exp = DataFrame({"isBool": [True], "isNum": [val]})
exp = DataFrame({"isNum": [val], "isBool": [True]})

tm.assert_frame_equal(res, exp)

Expand Down Expand Up @@ -555,3 +555,13 @@ def test_combine_first_empty_columns():
result = left.combine_first(right)
expected = DataFrame(columns=["a", "b", "c"])
tm.assert_frame_equal(result, expected)


def test_combine_first_preserve_column_order():
# GH#60427
df1 = DataFrame({"B": [1, 2, 3], "A": [4, None, 6]})
df2 = DataFrame({"A": [5]}, index=[1])

result = df1.combine_first(df2)
expected = DataFrame({"B": [1, 2, 3], "A": [4.0, 5.0, 6.0]})
tm.assert_frame_equal(result, expected)

0 comments on commit e36b000

Please sign in to comment.