1
1
import numpy as np
2
2
import pytest
3
3
4
- from pandas ._config import using_string_dtype
5
-
6
4
from pandas .compat import HAS_PYARROW
7
5
from pandas .errors import SettingWithCopyWarning
8
6
@@ -953,15 +951,19 @@ def test_head_tail(method, using_copy_on_write, warn_copy_on_write):
953
951
tm .assert_frame_equal (df , df_orig )
954
952
955
953
956
- @pytest .mark .xfail (using_string_dtype () and HAS_PYARROW , reason = "TODO(infer_string)" )
957
- def test_infer_objects (using_copy_on_write ):
958
- df = DataFrame ({"a" : [1 , 2 ], "b" : "c" , "c" : 1 , "d" : "x" })
954
+ def test_infer_objects (using_copy_on_write , using_infer_string ):
955
+ df = DataFrame (
956
+ {"a" : [1 , 2 ], "b" : Series (["x" , "y" ], dtype = object ), "c" : 1 , "d" : "x" }
957
+ )
959
958
df_orig = df .copy ()
960
959
df2 = df .infer_objects ()
961
960
962
961
if using_copy_on_write :
963
962
assert np .shares_memory (get_array (df2 , "a" ), get_array (df , "a" ))
964
- assert np .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
963
+ if using_infer_string :
964
+ assert not tm .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
965
+ else :
966
+ assert np .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
965
967
966
968
else :
967
969
assert not np .shares_memory (get_array (df2 , "a" ), get_array (df , "a" ))
@@ -975,19 +977,16 @@ def test_infer_objects(using_copy_on_write):
975
977
tm .assert_frame_equal (df , df_orig )
976
978
977
979
978
- @pytest .mark .xfail (
979
- using_string_dtype () and not HAS_PYARROW , reason = "TODO(infer_string)"
980
- )
981
- def test_infer_objects_no_reference (using_copy_on_write ):
980
+ def test_infer_objects_no_reference (using_copy_on_write , using_infer_string ):
982
981
df = DataFrame (
983
982
{
984
983
"a" : [1 , 2 ],
985
- "b" : "c" ,
984
+ "b" : Series ([ "x" , "y" ], dtype = object ) ,
986
985
"c" : 1 ,
987
986
"d" : Series (
988
987
[Timestamp ("2019-12-31" ), Timestamp ("2020-12-31" )], dtype = "object"
989
988
),
990
- "e" : "b" ,
989
+ "e" : Series ([ "z" , "w" ], dtype = object ) ,
991
990
}
992
991
)
993
992
df = df .infer_objects ()
@@ -1001,16 +1000,22 @@ def test_infer_objects_no_reference(using_copy_on_write):
1001
1000
df .iloc [0 , 3 ] = Timestamp ("2018-12-31" )
1002
1001
if using_copy_on_write :
1003
1002
assert np .shares_memory (arr_a , get_array (df , "a" ))
1004
- # TODO(CoW): Block splitting causes references here
1005
- assert not np .shares_memory (arr_b , get_array (df , "b" ))
1003
+ if using_infer_string :
1004
+ # note that the underlying memory of arr_b has been copied anyway
1005
+ # because of the assignment, but the EA is updated inplace so still
1006
+ # appears the share memory
1007
+ assert tm .shares_memory (arr_b , get_array (df , "b" ))
1008
+ else :
1009
+ # TODO(CoW): Block splitting causes references here
1010
+ assert not np .shares_memory (arr_b , get_array (df , "b" ))
1006
1011
assert np .shares_memory (arr_d , get_array (df , "d" ))
1007
1012
1008
1013
1009
- def test_infer_objects_reference (using_copy_on_write ):
1014
+ def test_infer_objects_reference (using_copy_on_write , using_infer_string ):
1010
1015
df = DataFrame (
1011
1016
{
1012
1017
"a" : [1 , 2 ],
1013
- "b" : "c" ,
1018
+ "b" : Series ([ "x" , "y" ], dtype = object ) ,
1014
1019
"c" : 1 ,
1015
1020
"d" : Series (
1016
1021
[Timestamp ("2019-12-31" ), Timestamp ("2020-12-31" )], dtype = "object"
@@ -1029,7 +1034,8 @@ def test_infer_objects_reference(using_copy_on_write):
1029
1034
df .iloc [0 , 3 ] = Timestamp ("2018-12-31" )
1030
1035
if using_copy_on_write :
1031
1036
assert not np .shares_memory (arr_a , get_array (df , "a" ))
1032
- assert not np .shares_memory (arr_b , get_array (df , "b" ))
1037
+ if not using_infer_string or HAS_PYARROW :
1038
+ assert not np .shares_memory (arr_b , get_array (df , "b" ))
1033
1039
assert np .shares_memory (arr_d , get_array (df , "d" ))
1034
1040
1035
1041
@@ -1184,15 +1190,14 @@ def test_sort_values_inplace(using_copy_on_write, obj, kwargs, warn_copy_on_writ
1184
1190
assert np .shares_memory (get_array (obj , "a" ), get_array (view , "a" ))
1185
1191
1186
1192
1187
- @pytest .mark .xfail (using_string_dtype () and HAS_PYARROW , reason = "TODO(infer_string)" )
1188
1193
@pytest .mark .parametrize ("decimals" , [- 1 , 0 , 1 ])
1189
1194
def test_round (using_copy_on_write , warn_copy_on_write , decimals ):
1190
1195
df = DataFrame ({"a" : [1 , 2 ], "b" : "c" })
1191
1196
df_orig = df .copy ()
1192
1197
df2 = df .round (decimals = decimals )
1193
1198
1194
1199
if using_copy_on_write :
1195
- assert np .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
1200
+ assert tm .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
1196
1201
# TODO: Make inplace by using out parameter of ndarray.round?
1197
1202
if decimals >= 0 :
1198
1203
# Ensure lazy copy if no-op
0 commit comments