diff --git a/pandas/conftest.py b/pandas/conftest.py
index 35a0d3b89400f..168ef5fed8fae 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -2022,6 +2022,14 @@ def warn_copy_on_write() -> bool:
     )
 
 
+@pytest.fixture
+def using_infer_string() -> bool:
+    """
+    Fixture to check if infer_string is enabled.
+    """
+    return pd.options.future.infer_string
+
+
 warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
 if zoneinfo is not None:
     warsaws.append(zoneinfo.ZoneInfo("Europe/Warsaw"))  # type: ignore[arg-type]
diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py
index 53ac7fbf40af1..618f69eb744e3 100644
--- a/pandas/tests/series/accessors/test_dt_accessor.py
+++ b/pandas/tests/series/accessors/test_dt_accessor.py
@@ -586,13 +586,15 @@ def test_strftime_dt64_days(self):
         # dtype may be S10 or U10 depending on python version
         tm.assert_index_equal(result, expected)
 
-    def test_strftime_period_days(self):
+    def test_strftime_period_days(self, using_infer_string):
         period_index = period_range("20150301", periods=5)
         result = period_index.strftime("%Y/%m/%d")
         expected = Index(
             ["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
             dtype="=U10",
         )
+        if using_infer_string:
+            expected = expected.astype("string[pyarrow_numpy]")
         tm.assert_index_equal(result, expected)
 
     def test_strftime_dt64_microsecond_resolution(self):
diff --git a/pandas/tests/series/indexing/test_delitem.py b/pandas/tests/series/indexing/test_delitem.py
index af6b3910baec0..3d1082c3d040b 100644
--- a/pandas/tests/series/indexing/test_delitem.py
+++ b/pandas/tests/series/indexing/test_delitem.py
@@ -31,19 +31,16 @@ def test_delitem(self):
         del s[0]
         tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
 
-    def test_delitem_object_index(self):
+    def test_delitem_object_index(self, using_infer_string):
         # Index(dtype=object)
-        s = Series(1, index=["a"])
+        dtype = "string[pyarrow_numpy]" if using_infer_string else object
+        s = Series(1, index=Index(["a"], dtype=dtype))
         del s["a"]
-        tm.assert_series_equal(
-            s, Series(dtype="int64", index=Index([], dtype="object"))
-        )
+        tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
         s["a"] = 1
-        tm.assert_series_equal(s, Series(1, index=["a"]))
+        tm.assert_series_equal(s, Series(1, index=Index(["a"], dtype=dtype)))
         del s["a"]
-        tm.assert_series_equal(
-            s, Series(dtype="int64", index=Index([], dtype="object"))
-        )
+        tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
 
     def test_delitem_missing_key(self):
         # empty
diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py
index 479e74703bc0e..596a225c288b8 100644
--- a/pandas/tests/series/indexing/test_getitem.py
+++ b/pandas/tests/series/indexing/test_getitem.py
@@ -71,7 +71,7 @@ def test_getitem_unrecognized_scalar(self):
     def test_getitem_negative_out_of_bounds(self):
         ser = Series(["a"] * 10, index=["a"] * 10)
 
-        msg = "index -11 is out of bounds for axis 0 with size 10"
+        msg = "index -11 is out of bounds for axis 0 with size 10|index out of bounds"
         warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
         with pytest.raises(IndexError, match=msg):
             with tm.assert_produces_warning(FutureWarning, match=warn_msg):
@@ -363,7 +363,9 @@ def test_getitem_no_matches(self, box):
         key = Series(["C"], dtype=object)
         key = box(key)
 
-        msg = r"None of \[Index\(\['C'\], dtype='object'\)\] are in the \[index\]"
+        msg = (
+            r"None of \[Index\(\['C'\], dtype='object|string'\)\] are in the \[index\]"
+        )
         with pytest.raises(KeyError, match=msg):
             ser[key]
 
@@ -437,7 +439,7 @@ def test_getitem_boolean_empty(self):
 
         # GH#5877
         # indexing with empty series
-        ser = Series(["A", "B"])
+        ser = Series(["A", "B"], dtype=object)
         expected = Series(dtype=object, index=Index([], dtype="int64"))
         result = ser[Series([], dtype=object)]
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
index 16c127e6ece7b..02d51d5119469 100644
--- a/pandas/tests/series/indexing/test_setitem.py
+++ b/pandas/tests/series/indexing/test_setitem.py
@@ -2,6 +2,7 @@
     date,
     datetime,
 )
+from decimal import Decimal
 
 import numpy as np
 import pytest
@@ -175,7 +176,8 @@ class TestSetitemScalarIndexer:
     def test_setitem_negative_out_of_bounds(self):
         ser = Series(["a"] * 10, index=["a"] * 10)
 
-        msg = "index -11 is out of bounds for axis 0 with size 10"
+        # string index falls back to positional
+        msg = "index -11|-1 is out of bounds for axis 0 with size 10"
         warn_msg = "Series.__setitem__ treating keys as positions is deprecated"
         with pytest.raises(IndexError, match=msg):
             with tm.assert_produces_warning(FutureWarning, match=warn_msg):
@@ -527,8 +529,12 @@ def test_setitem_empty_series_timestamp_preserves_dtype(self):
             Timedelta("9 days").to_pytimedelta(),
         ],
     )
-    def test_append_timedelta_does_not_cast(self, td):
+    def test_append_timedelta_does_not_cast(self, td, using_infer_string, request):
         # GH#22717 inserting a Timedelta should _not_ cast to int64
+        if using_infer_string and not isinstance(td, Timedelta):
+            # TODO: GH#56010
+            request.applymarker(pytest.mark.xfail(reason="inferred as string"))
+
         expected = Series(["x", td], index=[0, "td"], dtype=object)
 
         ser = Series(["x"])
@@ -595,13 +601,21 @@ def test_setitem_enlarge_with_na(
         expected = Series(expected_values, dtype=target_dtype)
         tm.assert_series_equal(ser, expected)
 
-    def test_setitem_enlargement_object_none(self, nulls_fixture):
+    def test_setitem_enlargement_object_none(self, nulls_fixture, using_infer_string):
         # GH#48665
         ser = Series(["a", "b"])
         ser[3] = nulls_fixture
-        expected = Series(["a", "b", nulls_fixture], index=[0, 1, 3])
+        dtype = (
+            "string[pyarrow_numpy]"
+            if using_infer_string and not isinstance(nulls_fixture, Decimal)
+            else object
+        )
+        expected = Series(["a", "b", nulls_fixture], index=[0, 1, 3], dtype=dtype)
         tm.assert_series_equal(ser, expected)
-        assert ser[3] is nulls_fixture
+        if using_infer_string:
+            ser[3] is np.nan
+        else:
+            assert ser[3] is nulls_fixture
 
 
 def test_setitem_scalar_into_readonly_backing_data():
@@ -845,20 +859,28 @@ def test_series_where(self, obj, key, expected, warn, val, is_inplace):
 
         self._check_inplace(is_inplace, orig, arr, obj)
 
-    def test_index_where(self, obj, key, expected, warn, val):
+    def test_index_where(self, obj, key, expected, warn, val, using_infer_string):
         mask = np.zeros(obj.shape, dtype=bool)
         mask[key] = True
 
-        res = Index(obj).where(~mask, val)
-        expected_idx = Index(expected, dtype=expected.dtype)
-        tm.assert_index_equal(res, expected_idx)
+        if using_infer_string and obj.dtype == object:
+            with pytest.raises(TypeError, match="Scalar must"):
+                Index(obj).where(~mask, val)
+        else:
+            res = Index(obj).where(~mask, val)
+            expected_idx = Index(expected, dtype=expected.dtype)
+            tm.assert_index_equal(res, expected_idx)
 
-    def test_index_putmask(self, obj, key, expected, warn, val):
+    def test_index_putmask(self, obj, key, expected, warn, val, using_infer_string):
         mask = np.zeros(obj.shape, dtype=bool)
         mask[key] = True
 
-        res = Index(obj).putmask(mask, val)
-        tm.assert_index_equal(res, Index(expected, dtype=expected.dtype))
+        if using_infer_string and obj.dtype == object:
+            with pytest.raises(TypeError, match="Scalar must"):
+                Index(obj).putmask(mask, val)
+        else:
+            res = Index(obj).putmask(mask, val)
+            tm.assert_index_equal(res, Index(expected, dtype=expected.dtype))
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py
index 7c1507ce423ad..c978481ca9988 100644
--- a/pandas/tests/series/indexing/test_where.py
+++ b/pandas/tests/series/indexing/test_where.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 from pandas.core.dtypes.common import is_integer
 
 import pandas as pd
@@ -230,6 +232,7 @@ def test_where_ndframe_align():
     tm.assert_series_equal(out, expected)
 
 
+@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't set ints into string")
 def test_where_setitem_invalid():
     # GH 2702
     # make sure correct exceptions are raised on invalid list assignment
diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py
index 7d4ad99deab38..122e1927bebe9 100644
--- a/pandas/tests/series/methods/test_astype.py
+++ b/pandas/tests/series/methods/test_astype.py
@@ -75,7 +75,7 @@ def test_astype_dict_like(self, dtype_class):
 
         dt1 = dtype_class({"abc": str})
         result = ser.astype(dt1)
-        expected = Series(["0", "2", "4", "6", "8"], name="abc")
+        expected = Series(["0", "2", "4", "6", "8"], name="abc", dtype=object)
         tm.assert_series_equal(result, expected)
 
         dt2 = dtype_class({"abc": "float64"})
@@ -163,10 +163,12 @@ def test_astype_empty_constructor_equality(self, dtype):
             Series([string.digits * 10, rand_str(63), rand_str(64), np.nan, 1.0]),
         ],
     )
-    def test_astype_str_map(self, dtype, series):
+    def test_astype_str_map(self, dtype, series, using_infer_string):
         # see GH#4405
         result = series.astype(dtype)
         expected = series.map(str)
+        if using_infer_string:
+            expected = expected.astype(object)
         tm.assert_series_equal(result, expected)
 
     def test_astype_float_to_period(self):
@@ -276,13 +278,13 @@ def test_astype_str_cast_dt64(self):
         ts = Series([Timestamp("2010-01-04 00:00:00")])
         res = ts.astype(str)
 
-        expected = Series(["2010-01-04"])
+        expected = Series(["2010-01-04"], dtype=object)
         tm.assert_series_equal(res, expected)
 
         ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
         res = ts.astype(str)
 
-        expected = Series(["2010-01-04 00:00:00-05:00"])
+        expected = Series(["2010-01-04 00:00:00-05:00"], dtype=object)
         tm.assert_series_equal(res, expected)
 
     def test_astype_str_cast_td64(self):
@@ -291,7 +293,7 @@ def test_astype_str_cast_td64(self):
         td = Series([Timedelta(1, unit="d")])
         ser = td.astype(str)
 
-        expected = Series(["1 days"])
+        expected = Series(["1 days"], dtype=object)
         tm.assert_series_equal(ser, expected)
 
     def test_dt64_series_astype_object(self):
@@ -338,7 +340,7 @@ def test_astype_from_float_to_str(self, dtype):
         # https://github.com/pandas-dev/pandas/issues/36451
         ser = Series([0.1], dtype=dtype)
         result = ser.astype(str)
-        expected = Series(["0.1"])
+        expected = Series(["0.1"], dtype=object)
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -409,7 +411,7 @@ def test_astype_cast_object_int(self):
 
         tm.assert_series_equal(result, Series(np.arange(1, 5)))
 
-    def test_astype_unicode(self):
+    def test_astype_unicode(self, using_infer_string):
         # see GH#7758: A bit of magic is required to set
         # default encoding to utf-8
         digits = string.digits
@@ -426,12 +428,14 @@ def test_astype_unicode(self):
             item = "野菜食べないとやばい"
             ser = Series([item.encode()])
             result = ser.astype(np.str_)
-            expected = Series([item])
+            expected = Series([item], dtype=object)
             tm.assert_series_equal(result, expected)
 
         for ser in test_series:
             res = ser.astype(np.str_)
             expec = ser.map(str)
+            if using_infer_string:
+                expec = expec.astype(object)
             tm.assert_series_equal(res, expec)
 
         # Restore the former encoding
@@ -527,12 +531,12 @@ def test_astype_categorical_to_other(self):
         expected = ser
         tm.assert_series_equal(ser.astype("category"), expected)
         tm.assert_series_equal(ser.astype(CategoricalDtype()), expected)
-        msg = r"Cannot cast object dtype to float64"
+        msg = r"Cannot cast object|string dtype to float64"
         with pytest.raises(ValueError, match=msg):
             ser.astype("float64")
 
         cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
-        exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"])
+        exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"], dtype=object)
         tm.assert_series_equal(cat.astype("str"), exp)
         s2 = Series(Categorical(["1", "2", "3", "4"]))
         exp2 = Series([1, 2, 3, 4]).astype("int")
diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py
index 47659308cfcad..795b2eab82aca 100644
--- a/pandas/tests/series/methods/test_combine_first.py
+++ b/pandas/tests/series/methods/test_combine_first.py
@@ -53,7 +53,7 @@ def test_combine_first(self):
         # mixed types
         index = tm.makeStringIndex(20)
         floats = Series(np.random.default_rng(2).standard_normal(20), index=index)
-        strings = Series(tm.makeStringIndex(10), index=index[::2])
+        strings = Series(tm.makeStringIndex(10), index=index[::2], dtype=object)
 
         combined = strings.combine_first(floats)
 
diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py
index 0cd39140938c4..2b1bb4c31125f 100644
--- a/pandas/tests/series/methods/test_convert_dtypes.py
+++ b/pandas/tests/series/methods/test_convert_dtypes.py
@@ -186,6 +186,7 @@ def test_convert_dtypes(
         self,
         test_cases,
         params,
+        using_infer_string,
     ):
         data, maindtype, expected_default, expected_other = test_cases
         if (
@@ -219,6 +220,16 @@ def test_convert_dtypes(
         for spec, dtype in expected_other.items():
             if all(params_dict[key] is val for key, val in zip(spec[::2], spec[1::2])):
                 expected_dtype = dtype
+        if (
+            using_infer_string
+            and expected_default == "string"
+            and expected_dtype == object
+            and params[0]
+            and not params[1]
+        ):
+            # If we would convert with convert strings then infer_objects converts
+            # with the option
+            expected_dtype = "string[pyarrow_numpy]"
 
         expected = pd.Series(data, dtype=expected_dtype)
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py
index 6d78ecd61cdcb..f86f6069a2ef3 100644
--- a/pandas/tests/series/methods/test_map.py
+++ b/pandas/tests/series/methods/test_map.py
@@ -83,7 +83,7 @@ def func(x):
     tm.assert_series_equal(result, expected)
 
 
-def test_map_series_stringdtype(any_string_dtype):
+def test_map_series_stringdtype(any_string_dtype, using_infer_string):
     # map test on StringDType, GH#40823
     ser1 = Series(
         data=["cat", "dog", "rabbit"],
@@ -98,6 +98,8 @@ def test_map_series_stringdtype(any_string_dtype):
         item = np.nan
 
     expected = Series(data=["rabbit", "dog", "cat", item], dtype=any_string_dtype)
+    if using_infer_string and any_string_dtype == "object":
+        expected = expected.astype("string[pyarrow_numpy]")
 
     tm.assert_series_equal(result, expected)
 
@@ -106,7 +108,7 @@ def test_map_series_stringdtype(any_string_dtype):
     "data, expected_dtype",
     [(["1-1", "1-1", np.nan], "category"), (["1-1", "1-2", np.nan], object)],
 )
-def test_map_categorical_with_nan_values(data, expected_dtype):
+def test_map_categorical_with_nan_values(data, expected_dtype, using_infer_string):
     # GH 20714 bug fixed in: GH 24275
     def func(val):
         return val.split("-")[0]
@@ -114,6 +116,8 @@ def func(val):
     s = Series(data, dtype="category")
 
     result = s.map(func, na_action="ignore")
+    if using_infer_string and expected_dtype == object:
+        expected_dtype = "string[pyarrow_numpy]"
     expected = Series(["1", "1", np.nan], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -133,11 +137,15 @@ def test_map_empty_integer_series_with_datetime_index():
 
 
 @pytest.mark.parametrize("func", [str, lambda x: str(x)])
-def test_map_simple_str_callables_same_as_astype(string_series, func):
+def test_map_simple_str_callables_same_as_astype(
+    string_series, func, using_infer_string
+):
     # test that we are evaluating row-by-row first
     # before vectorized evaluation
     result = string_series.map(func)
-    expected = string_series.astype(str)
+    expected = string_series.astype(
+        str if not using_infer_string else "string[pyarrow_numpy]"
+    )
     tm.assert_series_equal(result, expected)
 
 
@@ -461,7 +469,7 @@ def test_map_box_period():
 
 
 @pytest.mark.parametrize("na_action", [None, "ignore"])
-def test_map_categorical(na_action):
+def test_map_categorical(na_action, using_infer_string):
     values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
     s = Series(values, name="XX", index=list("abcdefg"))
 
@@ -474,7 +482,7 @@ def test_map_categorical(na_action):
     result = s.map(lambda x: "A", na_action=na_action)
     exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
     tm.assert_series_equal(result, exp)
-    assert result.dtype == object
+    assert result.dtype == object if not using_infer_string else "string"
 
 
 @pytest.mark.parametrize(
@@ -536,12 +544,14 @@ def f(x):
         (list(range(3)), {0: 42}, [42] + [np.nan] * 3),
     ],
 )
-def test_map_missing_mixed(vals, mapping, exp):
+def test_map_missing_mixed(vals, mapping, exp, using_infer_string):
     # GH20495
     s = Series(vals + [np.nan])
     result = s.map(mapping)
-
-    tm.assert_series_equal(result, Series(exp))
+    exp = Series(exp)
+    if using_infer_string and mapping == {np.nan: "not NaN"}:
+        exp.iloc[-1] = np.nan
+    tm.assert_series_equal(result, exp)
 
 
 def test_map_scalar_on_date_time_index_aware_series():
diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py
index 0923a2d42ce10..6f0c8d751a92a 100644
--- a/pandas/tests/series/methods/test_reindex.py
+++ b/pandas/tests/series/methods/test_reindex.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 import pandas.util._test_decorators as td
 
 from pandas import (
@@ -22,6 +24,9 @@
 import pandas._testing as tm
 
 
+@pytest.mark.xfail(
+    using_pyarrow_string_dtype(), reason="share memory doesn't work for arrow"
+)
 def test_reindex(datetime_series, string_series):
     identity = string_series.reindex(string_series.index)
 
diff --git a/pandas/tests/series/methods/test_rename.py b/pandas/tests/series/methods/test_rename.py
index 83adff08b758e..119654bd19b3f 100644
--- a/pandas/tests/series/methods/test_rename.py
+++ b/pandas/tests/series/methods/test_rename.py
@@ -8,6 +8,7 @@
     Index,
     MultiIndex,
     Series,
+    array,
 )
 import pandas._testing as tm
 
@@ -45,22 +46,28 @@ def test_rename_by_series(self):
         expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo")
         tm.assert_series_equal(result, expected)
 
-    def test_rename_set_name(self):
+    def test_rename_set_name(self, using_infer_string):
         ser = Series(range(4), index=list("abcd"))
         for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
             result = ser.rename(name)
             assert result.name == name
-            tm.assert_numpy_array_equal(result.index.values, ser.index.values)
+            if using_infer_string:
+                tm.assert_extension_array_equal(result.index.values, ser.index.values)
+            else:
+                tm.assert_numpy_array_equal(result.index.values, ser.index.values)
             assert ser.name is None
 
-    def test_rename_set_name_inplace(self):
+    def test_rename_set_name_inplace(self, using_infer_string):
         ser = Series(range(3), index=list("abc"))
         for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
             ser.rename(name, inplace=True)
             assert ser.name == name
-
             exp = np.array(["a", "b", "c"], dtype=np.object_)
-            tm.assert_numpy_array_equal(ser.index.values, exp)
+            if using_infer_string:
+                exp = array(exp, dtype="string[pyarrow_numpy]")
+                tm.assert_extension_array_equal(ser.index.values, exp)
+            else:
+                tm.assert_numpy_array_equal(ser.index.values, exp)
 
     def test_rename_axis_supported(self):
         # Supporting axis for compatibility, detailed in GH-18589
diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index f08966c3816c0..fe0f79b766f72 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 import pandas as pd
 import pandas._testing as tm
 from pandas.core.arrays import IntervalArray
@@ -389,6 +391,7 @@ def test_replace_mixed_types_with_string(self):
         expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
         tm.assert_series_equal(expected, result)
 
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't fill 0 in string")
     @pytest.mark.parametrize(
         "categorical, numeric",
         [
@@ -719,6 +722,7 @@ def test_replace_nullable_numeric(self):
         with pytest.raises(TypeError, match="Invalid value"):
             ints.replace(1, 9.5)
 
+    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't fill 1 in string")
     @pytest.mark.parametrize("regex", [False, True])
     def test_replace_regex_dtype_series(self, regex):
         # GH-48644
@@ -748,10 +752,12 @@ def test_replace_value_none_dtype_numeric(self, val):
         expected = pd.Series([1, None], dtype=object)
         tm.assert_series_equal(result, expected)
 
-    def test_replace_change_dtype_series(self):
+    def test_replace_change_dtype_series(self, using_infer_string):
         # GH#25797
         df = pd.DataFrame.from_dict({"Test": ["0.5", True, "0.6"]})
-        df["Test"] = df["Test"].replace([True], [np.nan])
+        warn = FutureWarning if using_infer_string else None
+        with tm.assert_produces_warning(warn, match="Downcasting"):
+            df["Test"] = df["Test"].replace([True], [np.nan])
         expected = pd.DataFrame.from_dict({"Test": ["0.5", np.nan, "0.6"]})
         tm.assert_frame_equal(df, expected)
 
diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py
index db36221d8f510..bb5ad8cc5a25a 100644
--- a/pandas/tests/series/methods/test_reset_index.py
+++ b/pandas/tests/series/methods/test_reset_index.py
@@ -166,12 +166,20 @@ def test_reset_index_inplace_and_drop_ignore_name(self):
         ),
     ],
 )
-def test_reset_index_dtypes_on_empty_series_with_multiindex(array, dtype):
+def test_reset_index_dtypes_on_empty_series_with_multiindex(
+    array, dtype, using_infer_string
+):
     # GH 19602 - Preserve dtype on empty Series with MultiIndex
     idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
     result = Series(dtype=object, index=idx)[:0].reset_index().dtypes
+    exp = "string" if using_infer_string else object
     expected = Series(
-        {"level_0": np.int64, "level_1": np.float64, "level_2": dtype, 0: object}
+        {
+            "level_0": np.int64,
+            "level_1": np.float64,
+            "level_2": exp if dtype == object else dtype,
+            0: object,
+        }
     )
     tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py
index 76ca05a60eb7a..1c17013d621c7 100644
--- a/pandas/tests/series/methods/test_to_csv.py
+++ b/pandas/tests/series/methods/test_to_csv.py
@@ -165,7 +165,7 @@ def test_to_csv_compression(self, s, encoding, compression):
                     pd.read_csv(fh, index_col=0, encoding=encoding).squeeze("columns"),
                 )
 
-    def test_to_csv_interval_index(self):
+    def test_to_csv_interval_index(self, using_infer_string):
         # GH 28210
         s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3))
 
@@ -175,6 +175,8 @@ def test_to_csv_interval_index(self):
 
             # can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
             expected = s.copy()
-            expected.index = expected.index.astype(str)
-
+            if using_infer_string:
+                expected.index = expected.index.astype("string[pyarrow_numpy]")
+            else:
+                expected.index = expected.index.astype(str)
             tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_update.py b/pandas/tests/series/methods/test_update.py
index c38b2400f0f4e..3745c045078b7 100644
--- a/pandas/tests/series/methods/test_update.py
+++ b/pandas/tests/series/methods/test_update.py
@@ -38,6 +38,7 @@ def test_update(self, using_copy_on_write):
             expected = DataFrame(
                 [[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"]
             )
+            expected["c"] = expected["c"].astype(object)
         tm.assert_frame_equal(df, expected)
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py
index b835be6d8e501..e7233f005e427 100644
--- a/pandas/tests/series/test_arithmetic.py
+++ b/pandas/tests/series/test_arithmetic.py
@@ -204,9 +204,9 @@ def test_series_integer_mod(self, index):
         s1 = Series(range(1, 10))
         s2 = Series("foo", index=index)
 
-        msg = "not all arguments converted during string formatting"
+        msg = "not all arguments converted during string formatting|mod not"
 
-        with pytest.raises(TypeError, match=msg):
+        with pytest.raises((TypeError, NotImplementedError), match=msg):
             s2 % s1
 
     def test_add_with_duplicate_index(self):
@@ -491,14 +491,27 @@ def test_ser_cmp_result_names(self, names, comparison_op):
             result = op(ser, cidx)
             assert result.name == names[2]
 
-    def test_comparisons(self):
+    def test_comparisons(self, using_infer_string):
         s = Series(["a", "b", "c"])
         s2 = Series([False, True, False])
 
         # it works!
         exp = Series([False, False, False])
-        tm.assert_series_equal(s == s2, exp)
-        tm.assert_series_equal(s2 == s, exp)
+        if using_infer_string:
+            import pyarrow as pa
+
+            msg = "has no kernel"
+            # TODO(3.0) GH56008
+            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
+                s == s2
+            with tm.assert_produces_warning(
+                DeprecationWarning, match="comparison", check_stacklevel=False
+            ):
+                with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
+                    s2 == s
+        else:
+            tm.assert_series_equal(s == s2, exp)
+            tm.assert_series_equal(s2 == s, exp)
 
     # -----------------------------------------------------------------
     # Categorical Dtype Comparisons
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 195c50969ffae..dd503827ba812 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -152,7 +152,7 @@ def test_scalar_extension_dtype(self, ea_scalar_and_dtype):
         assert ser.dtype == ea_dtype
         tm.assert_series_equal(ser, expected)
 
-    def test_constructor(self, datetime_series):
+    def test_constructor(self, datetime_series, using_infer_string):
         empty_series = Series()
         assert datetime_series.index._is_all_dates
 
@@ -166,7 +166,7 @@ def test_constructor(self, datetime_series):
 
         # Mixed type Series
         mixed = Series(["hello", np.nan], index=[0, 1])
-        assert mixed.dtype == np.object_
+        assert mixed.dtype == np.object_ if not using_infer_string else "string"
         assert np.isnan(mixed[1])
 
         assert not empty_series.index._is_all_dates
@@ -197,7 +197,7 @@ def test_constructor_index_ndim_gt_1_raises(self):
             Series([1, 3, 2], index=df)
 
     @pytest.mark.parametrize("input_class", [list, dict, OrderedDict])
-    def test_constructor_empty(self, input_class):
+    def test_constructor_empty(self, input_class, using_infer_string):
         empty = Series()
         empty2 = Series(input_class())
 
@@ -228,7 +228,10 @@ def test_constructor_empty(self, input_class):
 
             # GH 19853 : with empty string, index and dtype str
             empty = Series("", dtype=str, index=range(3))
-            empty2 = Series("", index=range(3))
+            if using_infer_string:
+                empty2 = Series("", index=range(3), dtype=object)
+            else:
+                empty2 = Series("", index=range(3))
             tm.assert_series_equal(empty, empty2)
 
     @pytest.mark.parametrize("input_arg", [np.nan, float("nan")])
@@ -1439,7 +1442,7 @@ def test_constructor_dict_of_tuples(self):
 
     # https://github.com/pandas-dev/pandas/issues/22698
     @pytest.mark.filterwarnings("ignore:elementwise comparison:FutureWarning")
-    def test_fromDict(self):
+    def test_fromDict(self, using_infer_string):
         data = {"a": 0, "b": 1, "c": 2, "d": 3}
 
         series = Series(data)
@@ -1451,19 +1454,19 @@ def test_fromDict(self):
 
         data = {"a": 0, "b": "1", "c": "2", "d": "3"}
         series = Series(data)
-        assert series.dtype == np.object_
+        assert series.dtype == np.object_ if not using_infer_string else "string"
 
         data = {"a": "0", "b": "1"}
         series = Series(data, dtype=float)
         assert series.dtype == np.float64
 
-    def test_fromValue(self, datetime_series):
+    def test_fromValue(self, datetime_series, using_infer_string):
         nans = Series(np.nan, index=datetime_series.index, dtype=np.float64)
         assert nans.dtype == np.float64
         assert len(nans) == len(datetime_series)
 
         strings = Series("foo", index=datetime_series.index)
-        assert strings.dtype == np.object_
+        assert strings.dtype == np.object_ if not using_infer_string else "string"
         assert len(strings) == len(datetime_series)
 
         d = datetime.now()
diff --git a/pandas/tests/series/test_formats.py b/pandas/tests/series/test_formats.py
index 25b34351627a1..040b1186980b2 100644
--- a/pandas/tests/series/test_formats.py
+++ b/pandas/tests/series/test_formats.py
@@ -6,6 +6,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_string_dtype
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -142,6 +144,9 @@ def test_tidy_repr_name_0(self, arg):
         rep_str = repr(ser)
         assert "Name: 0" in rep_str
 
+    @pytest.mark.xfail(
+        using_pyarrow_string_dtype(), reason="TODO: investigate why this is failing"
+    )
     def test_newline(self):
         ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"])
         assert "\t" not in repr(ser)
@@ -301,7 +306,7 @@ def __repr__(self) -> str:
         repr(ser)
         str(ser)
 
-    def test_categorical_repr(self):
+    def test_categorical_repr(self, using_infer_string):
         a = Series(Categorical([1, 2, 3, 4]))
         exp = (
             "0    1\n1    2\n2    3\n3    4\n"
@@ -311,22 +316,38 @@ def test_categorical_repr(self):
         assert exp == a.__str__()
 
         a = Series(Categorical(["a", "b"] * 25))
-        exp = (
-            "0     a\n1     b\n"
-            "     ..\n"
-            "48    a\n49    b\n"
-            "Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
-        )
+        if using_infer_string:
+            exp = (
+                "0     a\n1     b\n"
+                "     ..\n"
+                "48    a\n49    b\n"
+                "Length: 50, dtype: category\nCategories (2, string): [a, b]"
+            )
+        else:
+            exp = (
+                "0     a\n1     b\n"
+                "     ..\n"
+                "48    a\n49    b\n"
+                "Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
+            )
         with option_context("display.max_rows", 5):
             assert exp == repr(a)
 
         levs = list("abcdefghijklmnopqrstuvwxyz")
         a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
-        exp = (
-            "0    a\n1    b\n"
-            "dtype: category\n"
-            "Categories (26, object): ['a' < 'b' < 'c' < 'd' ... 'w' < 'x' < 'y' < 'z']"
-        )
+        if using_infer_string:
+            exp = (
+                "0    a\n1    b\n"
+                "dtype: category\n"
+                "Categories (26, string): [a < b < c < d ... w < x < y < z]"
+            )
+        else:
+            exp = (
+                "0    a\n1    b\n"
+                "dtype: category\n"
+                "Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
+                "'w' < 'x' < 'y' < 'z']"
+            )
         assert exp == a.__str__()
 
     def test_categorical_series_repr(self):
diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py
index 2146e154dc7fa..166f52181fed4 100644
--- a/pandas/tests/series/test_logical_ops.py
+++ b/pandas/tests/series/test_logical_ops.py
@@ -146,7 +146,7 @@ def test_logical_operators_int_dtype_with_bool(self):
         expected = Series([False, True, True, True])
         tm.assert_series_equal(result, expected)
 
-    def test_logical_operators_int_dtype_with_object(self):
+    def test_logical_operators_int_dtype_with_object(self, using_infer_string):
         # GH#9016: support bitwise op for integer types
         s_0123 = Series(range(4), dtype="int64")
 
@@ -155,8 +155,14 @@ def test_logical_operators_int_dtype_with_object(self):
         tm.assert_series_equal(result, expected)
 
         s_abNd = Series(["a", "b", np.nan, "d"])
-        with pytest.raises(TypeError, match="unsupported.* 'int' and 'str'"):
-            s_0123 & s_abNd
+        if using_infer_string:
+            import pyarrow as pa
+
+            with pytest.raises(pa.lib.ArrowNotImplementedError, match="has no kernel"):
+                s_0123 & s_abNd
+        else:
+            with pytest.raises(TypeError, match="unsupported.* 'int' and 'str'"):
+                s_0123 & s_abNd
 
     def test_logical_operators_bool_dtype_with_int(self):
         index = list("bca")
@@ -354,7 +360,7 @@ def test_reverse_ops_with_index(self, op, expected):
         result = op(ser, idx)
         tm.assert_series_equal(result, expected)
 
-    def test_logical_ops_label_based(self):
+    def test_logical_ops_label_based(self, using_infer_string):
         # GH#4947
         # logical ops should be label based
 
@@ -422,7 +428,17 @@ def test_logical_ops_label_based(self):
                 tm.assert_series_equal(result, a[a])
 
         for e in [Series(["z"])]:
-            result = a[a | e]
+            warn = FutureWarning if using_infer_string else None
+            if using_infer_string:
+                import pyarrow as pa
+
+                with tm.assert_produces_warning(warn, match="Operation between non"):
+                    with pytest.raises(
+                        pa.lib.ArrowNotImplementedError, match="has no kernel"
+                    ):
+                        result = a[a | e]
+            else:
+                result = a[a | e]
             tm.assert_series_equal(result, a[a])
 
         # vs scalars
diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py
index fbdf843a998bb..78a2819a49fc9 100644
--- a/pandas/tests/series/test_reductions.py
+++ b/pandas/tests/series/test_reductions.py
@@ -131,17 +131,22 @@ def test_validate_stat_keepdims():
         np.sum(ser, keepdims=True)
 
 
-def test_mean_with_convertible_string_raises(using_array_manager):
+def test_mean_with_convertible_string_raises(using_array_manager, using_infer_string):
     # GH#44008
     ser = Series(["1", "2"])
-    assert ser.sum() == "12"
-    msg = "Could not convert string '12' to numeric"
+    if using_infer_string:
+        msg = "does not support"
+        with pytest.raises(TypeError, match=msg):
+            ser.sum()
+    else:
+        assert ser.sum() == "12"
+    msg = "Could not convert string '12' to numeric|does not support"
     with pytest.raises(TypeError, match=msg):
         ser.mean()
 
     df = ser.to_frame()
     if not using_array_manager:
-        msg = r"Could not convert \['12'\] to numeric"
+        msg = r"Could not convert \['12'\] to numeric|does not support"
     with pytest.raises(TypeError, match=msg):
         df.mean()
 
@@ -152,29 +157,30 @@ def test_mean_dont_convert_j_to_complex(using_array_manager):
     if using_array_manager:
         msg = "Could not convert string 'J' to numeric"
     else:
-        msg = r"Could not convert \['J'\] to numeric"
+        msg = r"Could not convert \['J'\] to numeric|does not support"
     with pytest.raises(TypeError, match=msg):
         df.mean()
 
     with pytest.raises(TypeError, match=msg):
         df.agg("mean")
 
-    msg = "Could not convert string 'J' to numeric"
+    msg = "Could not convert string 'J' to numeric|does not support"
     with pytest.raises(TypeError, match=msg):
         df["db"].mean()
+    msg = "Could not convert string 'J' to numeric|ufunc 'divide'"
     with pytest.raises(TypeError, match=msg):
         np.mean(df["db"].astype("string").array)
 
 
 def test_median_with_convertible_string_raises(using_array_manager):
     # GH#34671 this _could_ return a string "2", but definitely not float 2.0
-    msg = r"Cannot convert \['1' '2' '3'\] to numeric"
+    msg = r"Cannot convert \['1' '2' '3'\] to numeric|does not support"
     ser = Series(["1", "2", "3"])
     with pytest.raises(TypeError, match=msg):
         ser.median()
 
     if not using_array_manager:
-        msg = r"Cannot convert \[\['1' '2' '3'\]\] to numeric"
+        msg = r"Cannot convert \[\['1' '2' '3'\]\] to numeric|does not support"
     df = ser.to_frame()
     with pytest.raises(TypeError, match=msg):
         df.median()