diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 1d8d0f6a74cb1..a7f63d75a047e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -758,6 +758,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`) +- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`) - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`) - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index af66bb54610f1..3669d8249dd27 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8671,6 +8671,7 @@ def combine( 2 NaN 3.0 1.0 """ other_idxlen = len(other.index) # save for compare + other_columns = other.columns this, other = self.align(other) new_index = this.index @@ -8681,8 +8682,8 @@ def combine( if self.empty and len(other) == other_idxlen: return other.copy() - # sorts if possible; otherwise align above ensures that these are set-equal - new_columns = this.columns.union(other.columns) + # preserve column order + new_columns = self.columns.union(other_columns, sort=False) do_fill = fill_value is not None result = {} for col in new_columns: diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index a70876b5a96ca..1e594043510ea 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -380,7 +380,7 @@ def test_combine_first_with_asymmetric_other(self, val): df2 = DataFrame({"isBool": [True]}) res = df1.combine_first(df2) - exp = DataFrame({"isBool": [True], "isNum": [val]}) + exp = DataFrame({"isNum": [val], "isBool": [True]}) tm.assert_frame_equal(res, exp) @@ -555,3 +555,13 @@ def test_combine_first_empty_columns(): result = left.combine_first(right) expected = DataFrame(columns=["a", "b", "c"]) tm.assert_frame_equal(result, expected) + + +def test_combine_first_preserve_column_order(): + # GH#60427 + df1 = DataFrame({"B": [1, 2, 3], "A": [4, None, 6]}) + df2 = DataFrame({"A": [5]}, index=[1]) + + result = df1.combine_first(df2) + expected = DataFrame({"B": [1, 2, 3], "A": [4.0, 5.0, 6.0]}) + tm.assert_frame_equal(result, expected)