12
12
Series ,
13
13
_testing as tm ,
14
14
)
15
+ from pandas .tests .strings import (
16
+ _convert_na_value ,
17
+ object_pyarrow_numpy ,
18
+ )
15
19
16
20
17
21
@pytest .mark .parametrize ("method" , ["split" , "rsplit" ])
@@ -20,9 +24,7 @@ def test_split(any_string_dtype, method):
20
24
21
25
result = getattr (values .str , method )("_" )
22
26
exp = Series ([["a" , "b" , "c" ], ["c" , "d" , "e" ], np .nan , ["f" , "g" , "h" ]])
23
- if values .dtype != object :
24
- # GH#18463
25
- exp = exp .fillna (pd .NA )
27
+ exp = _convert_na_value (values , exp )
26
28
tm .assert_series_equal (result , exp )
27
29
28
30
@@ -32,9 +34,7 @@ def test_split_more_than_one_char(any_string_dtype, method):
32
34
values = Series (["a__b__c" , "c__d__e" , np .nan , "f__g__h" ], dtype = any_string_dtype )
33
35
result = getattr (values .str , method )("__" )
34
36
exp = Series ([["a" , "b" , "c" ], ["c" , "d" , "e" ], np .nan , ["f" , "g" , "h" ]])
35
- if values .dtype != object :
36
- # GH#18463
37
- exp = exp .fillna (pd .NA )
37
+ exp = _convert_na_value (values , exp )
38
38
tm .assert_series_equal (result , exp )
39
39
40
40
result = getattr (values .str , method )("__" , expand = False )
@@ -46,9 +46,7 @@ def test_split_more_regex_split(any_string_dtype):
46
46
values = Series (["a,b_c" , "c_d,e" , np .nan , "f,g,h" ], dtype = any_string_dtype )
47
47
result = values .str .split ("[,_]" )
48
48
exp = Series ([["a" , "b" , "c" ], ["c" , "d" , "e" ], np .nan , ["f" , "g" , "h" ]])
49
- if values .dtype != object :
50
- # GH#18463
51
- exp = exp .fillna (pd .NA )
49
+ exp = _convert_na_value (values , exp )
52
50
tm .assert_series_equal (result , exp )
53
51
54
52
@@ -118,8 +116,8 @@ def test_split_object_mixed(expand, method):
118
116
def test_split_n (any_string_dtype , method , n ):
119
117
s = Series (["a b" , pd .NA , "b c" ], dtype = any_string_dtype )
120
118
expected = Series ([["a" , "b" ], pd .NA , ["b" , "c" ]])
121
-
122
119
result = getattr (s .str , method )(" " , n = n )
120
+ expected = _convert_na_value (s , expected )
123
121
tm .assert_series_equal (result , expected )
124
122
125
123
@@ -128,9 +126,7 @@ def test_rsplit(any_string_dtype):
128
126
values = Series (["a,b_c" , "c_d,e" , np .nan , "f,g,h" ], dtype = any_string_dtype )
129
127
result = values .str .rsplit ("[,_]" )
130
128
exp = Series ([["a,b_c" ], ["c_d,e" ], np .nan , ["f,g,h" ]])
131
- if values .dtype != object :
132
- # GH#18463
133
- exp = exp .fillna (pd .NA )
129
+ exp = _convert_na_value (values , exp )
134
130
tm .assert_series_equal (result , exp )
135
131
136
132
@@ -139,9 +135,7 @@ def test_rsplit_max_number(any_string_dtype):
139
135
values = Series (["a_b_c" , "c_d_e" , np .nan , "f_g_h" ], dtype = any_string_dtype )
140
136
result = values .str .rsplit ("_" , n = 1 )
141
137
exp = Series ([["a_b" , "c" ], ["c_d" , "e" ], np .nan , ["f_g" , "h" ]])
142
- if values .dtype != object :
143
- # GH#18463
144
- exp = exp .fillna (pd .NA )
138
+ exp = _convert_na_value (values , exp )
145
139
tm .assert_series_equal (result , exp )
146
140
147
141
@@ -390,7 +384,7 @@ def test_split_nan_expand(any_string_dtype):
390
384
# check that these are actually np.nan/pd.NA and not None
391
385
# TODO see GH 18463
392
386
# tm.assert_frame_equal does not differentiate
393
- if any_string_dtype == "object" :
387
+ if any_string_dtype in object_pyarrow_numpy :
394
388
assert all (np .isnan (x ) for x in result .iloc [1 ])
395
389
else :
396
390
assert all (x is pd .NA for x in result .iloc [1 ])
@@ -455,9 +449,7 @@ def test_partition_series_more_than_one_char(method, exp, any_string_dtype):
455
449
s = Series (["a__b__c" , "c__d__e" , np .nan , "f__g__h" , None ], dtype = any_string_dtype )
456
450
result = getattr (s .str , method )("__" , expand = False )
457
451
expected = Series (exp )
458
- if s .dtype != object :
459
- # GH#18463
460
- expected = expected .fillna (pd .NA )
452
+ expected = _convert_na_value (s , expected )
461
453
tm .assert_series_equal (result , expected )
462
454
463
455
@@ -480,9 +472,7 @@ def test_partition_series_none(any_string_dtype, method, exp):
480
472
s = Series (["a b c" , "c d e" , np .nan , "f g h" , None ], dtype = any_string_dtype )
481
473
result = getattr (s .str , method )(expand = False )
482
474
expected = Series (exp )
483
- if s .dtype != object :
484
- # GH#18463
485
- expected = expected .fillna (pd .NA )
475
+ expected = _convert_na_value (s , expected )
486
476
tm .assert_series_equal (result , expected )
487
477
488
478
@@ -505,9 +495,7 @@ def test_partition_series_not_split(any_string_dtype, method, exp):
505
495
s = Series (["abc" , "cde" , np .nan , "fgh" , None ], dtype = any_string_dtype )
506
496
result = getattr (s .str , method )("_" , expand = False )
507
497
expected = Series (exp )
508
- if s .dtype != object :
509
- # GH#18463
510
- expected = expected .fillna (pd .NA )
498
+ expected = _convert_na_value (s , expected )
511
499
tm .assert_series_equal (result , expected )
512
500
513
501
@@ -531,9 +519,7 @@ def test_partition_series_unicode(any_string_dtype, method, exp):
531
519
532
520
result = getattr (s .str , method )("_" , expand = False )
533
521
expected = Series (exp )
534
- if s .dtype != object :
535
- # GH#18463
536
- expected = expected .fillna (pd .NA )
522
+ expected = _convert_na_value (s , expected )
537
523
tm .assert_series_equal (result , expected )
538
524
539
525
0 commit comments