From 9dcece4719c17490d606bc2088188a7faa706800 Mon Sep 17 00:00:00 2001
From: Kevin Liu <kevin.jq.liu@gmail.com>
Date: Thu, 27 Mar 2025 13:19:38 -0700
Subject: [PATCH 1/6] fix small type

---
 pyiceberg/io/pyarrow.py          |  6 +--
 tests/io/test_pyarrow.py         | 90 ++++++++++++++++----------------
 tests/io/test_pyarrow_visitor.py |  4 +-
 tests/test_schema.py             |  2 +-
 4 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 522af0f344..5f88a37218 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -625,7 +625,7 @@ def field(self, field: NestedField, field_result: pa.DataType) -> pa.Field:
 
     def list(self, list_type: ListType, element_result: pa.DataType) -> pa.DataType:
         element_field = self.field(list_type.element_field, element_result)
-        return pa.large_list(value_type=element_field)
+        return pa.list_(value_type=element_field)
 
     def map(self, map_type: MapType, key_result: pa.DataType, value_result: pa.DataType) -> pa.DataType:
         key_field = self.field(map_type.key_field, key_result)
@@ -675,7 +675,7 @@ def visit_timestamptz_ns(self, _: TimestamptzNanoType) -> pa.DataType:
         return pa.timestamp(unit="ns", tz="UTC")
 
     def visit_string(self, _: StringType) -> pa.DataType:
-        return pa.large_string()
+        return pa.string()
 
     def visit_uuid(self, _: UUIDType) -> pa.DataType:
         return pa.binary(16)
@@ -684,7 +684,7 @@ def visit_unknown(self, _: UnknownType) -> pa.DataType:
         return pa.null()
 
     def visit_binary(self, _: BinaryType) -> pa.DataType:
-        return pa.large_binary()
+        return pa.binary()
 
 
 def _convert_scalar(value: Any, iceberg_type: IcebergType) -> pa.scalar:
diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
index e90f3a46fc..d63043b13c 100644
--- a/tests/io/test_pyarrow.py
+++ b/tests/io/test_pyarrow.py
@@ -406,7 +406,7 @@ def test_pyarrow_unified_session_properties() -> None:
 
 def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema) -> None:
     actual = schema_to_pyarrow(table_schema_nested)
-    expected = """foo: large_string
+    expected = """foo: string
   -- field metadata --
   PARQUET:field_id: '1'
 bar: int32 not null
@@ -415,20 +415,20 @@ def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema)
 baz: bool
   -- field metadata --
   PARQUET:field_id: '3'
-qux: large_list<element: large_string not null> not null
-  child 0, element: large_string not null
+qux: list<element: string not null> not null
+  child 0, element: string not null
     -- field metadata --
     PARQUET:field_id: '5'
   -- field metadata --
   PARQUET:field_id: '4'
-quux: map<large_string, map<large_string, int32>> not null
-  child 0, entries: struct<key: large_string not null, value: map<large_string, int32> not null> not null
-      child 0, key: large_string not null
+quux: map<string, map<string, int32>> not null
+  child 0, entries: struct<key: string not null, value: map<string, int32> not null> not null
+      child 0, key: string not null
       -- field metadata --
       PARQUET:field_id: '7'
-      child 1, value: map<large_string, int32> not null
-          child 0, entries: struct<key: large_string not null, value: int32 not null> not null
-              child 0, key: large_string not null
+      child 1, value: map<string, int32> not null
+          child 0, entries: struct<key: string not null, value: int32 not null> not null
+              child 0, key: string not null
           -- field metadata --
           PARQUET:field_id: '9'
               child 1, value: int32 not null
@@ -438,7 +438,7 @@ def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema)
       PARQUET:field_id: '8'
   -- field metadata --
   PARQUET:field_id: '6'
-location: large_list<element: struct<latitude: float, longitude: float> not null> not null
+location: list<element: struct<latitude: float, longitude: float> not null> not null
   child 0, element: struct<latitude: float, longitude: float> not null
       child 0, latitude: float
       -- field metadata --
@@ -450,8 +450,8 @@ def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema)
     PARQUET:field_id: '12'
   -- field metadata --
   PARQUET:field_id: '11'
-person: struct<name: large_string, age: int32 not null>
-  child 0, name: large_string
+person: struct<name: string, age: int32 not null>
+  child 0, name: string
     -- field metadata --
     PARQUET:field_id: '16'
   child 1, age: int32 not null
@@ -464,24 +464,24 @@ def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema)
 
 def test_schema_to_pyarrow_schema_exclude_field_ids(table_schema_nested: Schema) -> None:
     actual = schema_to_pyarrow(table_schema_nested, include_field_ids=False)
-    expected = """foo: large_string
+    expected = """foo: string
 bar: int32 not null
 baz: bool
-qux: large_list<element: large_string not null> not null
-  child 0, element: large_string not null
-quux: map<large_string, map<large_string, int32>> not null
-  child 0, entries: struct<key: large_string not null, value: map<large_string, int32> not null> not null
-      child 0, key: large_string not null
-      child 1, value: map<large_string, int32> not null
-          child 0, entries: struct<key: large_string not null, value: int32 not null> not null
-              child 0, key: large_string not null
+qux: list<element: string not null> not null
+  child 0, element: string not null
+quux: map<string, map<string, int32>> not null
+  child 0, entries: struct<key: string not null, value: map<string, int32> not null> not null
+      child 0, key: string not null
+      child 1, value: map<string, int32> not null
+          child 0, entries: struct<key: string not null, value: int32 not null> not null
+              child 0, key: string not null
               child 1, value: int32 not null
-location: large_list<element: struct<latitude: float, longitude: float> not null> not null
+location: list<element: struct<latitude: float, longitude: float> not null> not null
   child 0, element: struct<latitude: float, longitude: float> not null
       child 0, latitude: float
       child 1, longitude: float
-person: struct<name: large_string, age: int32 not null>
-  child 0, name: large_string
+person: struct<name: string, age: int32 not null>
+  child 0, name: string
   child 1, age: int32 not null"""
     assert repr(actual) == expected
 
@@ -546,18 +546,18 @@ def test_timestamptz_type_to_pyarrow() -> None:
 
 def test_string_type_to_pyarrow() -> None:
     iceberg_type = StringType()
-    assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.large_string()
+    assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.string()
 
 
 def test_binary_type_to_pyarrow() -> None:
     iceberg_type = BinaryType()
-    assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.large_binary()
+    assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.binary()
 
 
 def test_struct_type_to_pyarrow(table_schema_simple: Schema) -> None:
     expected = pa.struct(
         [
-            pa.field("foo", pa.large_string(), nullable=True, metadata={"field_id": "1"}),
+            pa.field("foo", pa.string(), nullable=True, metadata={"field_id": "1"}),
             pa.field("bar", pa.int32(), nullable=False, metadata={"field_id": "2"}),
             pa.field("baz", pa.bool_(), nullable=True, metadata={"field_id": "3"}),
         ]
@@ -575,7 +575,7 @@ def test_map_type_to_pyarrow() -> None:
     )
     assert visit(iceberg_map, _ConvertToArrowSchema()) == pa.map_(
         pa.field("key", pa.int32(), nullable=False, metadata={"field_id": "1"}),
-        pa.field("value", pa.large_string(), nullable=False, metadata={"field_id": "2"}),
+        pa.field("value", pa.string(), nullable=False, metadata={"field_id": "2"}),
     )
 
 
@@ -585,7 +585,7 @@ def test_list_type_to_pyarrow() -> None:
         element_type=IntegerType(),
         element_required=True,
     )
-    assert visit(iceberg_map, _ConvertToArrowSchema()) == pa.large_list(
+    assert visit(iceberg_map, _ConvertToArrowSchema()) == pa.list_(
         pa.field("element", pa.int32(), nullable=False, metadata={"field_id": "1"})
     )
 
@@ -668,11 +668,11 @@ def test_expr_less_than_or_equal_to_pyarrow(bound_reference: BoundReference[str]
 
 def test_expr_in_to_pyarrow(bound_reference: BoundReference[str]) -> None:
     assert repr(expression_to_pyarrow(BoundIn(bound_reference, {literal("hello"), literal("world")}))) in (
-        """<pyarrow.compute.Expression is_in(foo, {value_set=large_string:[
+        """<pyarrow.compute.Expression is_in(foo, {value_set=string:[
   "hello",
   "world"
 ], null_matching_behavior=MATCH})>""",
-        """<pyarrow.compute.Expression is_in(foo, {value_set=large_string:[
+        """<pyarrow.compute.Expression is_in(foo, {value_set=string:[
   "world",
   "hello"
 ], null_matching_behavior=MATCH})>""",
@@ -681,11 +681,11 @@ def test_expr_in_to_pyarrow(bound_reference: BoundReference[str]) -> None:
 
 def test_expr_not_in_to_pyarrow(bound_reference: BoundReference[str]) -> None:
     assert repr(expression_to_pyarrow(BoundNotIn(bound_reference, {literal("hello"), literal("world")}))) in (
-        """<pyarrow.compute.Expression invert(is_in(foo, {value_set=large_string:[
+        """<pyarrow.compute.Expression invert(is_in(foo, {value_set=string:[
   "hello",
   "world"
 ], null_matching_behavior=MATCH}))>""",
-        """<pyarrow.compute.Expression invert(is_in(foo, {value_set=large_string:[
+        """<pyarrow.compute.Expression invert(is_in(foo, {value_set=string:[
   "world",
   "hello"
 ], null_matching_behavior=MATCH}))>""",
@@ -1030,12 +1030,12 @@ def test_projection_add_column(file_int: str) -> None:
     assert (
         repr(result_table.schema)
         == """id: int32
-list: large_list<element: int32>
+list: list<element: int32>
   child 0, element: int32
-map: map<int32, large_string>
-  child 0, entries: struct<key: int32 not null, value: large_string> not null
+map: map<int32, string>
+  child 0, entries: struct<key: int32 not null, value: string> not null
       child 0, key: int32 not null
-      child 1, value: large_string
+      child 1, value: string
 location: struct<lat: double, lon: double>
   child 0, lat: double
   child 1, lon: double"""
@@ -1051,7 +1051,7 @@ def test_read_list(schema_list: Schema, file_list: str) -> None:
 
     assert (
         repr(result_table.schema)
-        == """ids: large_list<element: int32>
+        == """ids: list<element: int32>
   child 0, element: int32"""
     )
 
@@ -1088,10 +1088,10 @@ def test_projection_add_column_struct(schema_int: Schema, file_int: str) -> None
         assert r.as_py() is None
     assert (
         repr(result_table.schema)
-        == """id: map<int32, large_string>
-  child 0, entries: struct<key: int32 not null, value: large_string> not null
+        == """id: map<int32, string>
+  child 0, entries: struct<key: int32 not null, value: string> not null
       child 0, key: int32 not null
-      child 1, value: large_string"""
+      child 1, value: string"""
     )
 
 
@@ -1422,7 +1422,7 @@ def test_projection_list_of_structs(schema_list_of_structs: Schema, file_list_of
     ]
     assert (
         repr(result_table.schema)
-        == """locations: large_list<element: struct<latitude: double not null, longitude: double not null, altitude: double>>
+        == """locations: list<element: struct<latitude: double not null, longitude: double not null, altitude: double>>
   child 0, element: struct<latitude: double not null, longitude: double not null, altitude: double>
       child 0, latitude: double not null
       child 1, longitude: double not null
@@ -1569,7 +1569,7 @@ def test_delete(deletes_file: str, example_task: FileScanTask, table_schema_simp
     assert (
         str(with_deletes)
         == """pyarrow.Table
-foo: large_string
+foo: string
 bar: int32 not null
 baz: bool
 ----
@@ -1606,7 +1606,7 @@ def test_delete_duplicates(deletes_file: str, example_task: FileScanTask, table_
     assert (
         str(with_deletes)
         == """pyarrow.Table
-foo: large_string
+foo: string
 bar: int32 not null
 baz: bool
 ----
@@ -1637,7 +1637,7 @@ def test_pyarrow_wrap_fsspec(example_task: FileScanTask, table_schema_simple: Sc
     assert (
         str(projection)
         == """pyarrow.Table
-foo: large_string
+foo: string
 bar: int32 not null
 baz: bool
 ----
diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py
index 9d5772d01c..6da7219c44 100644
--- a/tests/io/test_pyarrow_visitor.py
+++ b/tests/io/test_pyarrow_visitor.py
@@ -229,14 +229,14 @@ def test_pyarrow_timestamp_tz_invalid_tz() -> None:
 def test_pyarrow_string_to_iceberg(pyarrow_type: pa.DataType) -> None:
     converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
     assert converted_iceberg_type == StringType()
-    assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pa.large_string()
+    assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pa.string()
 
 
 @pytest.mark.parametrize("pyarrow_type", [pa.binary(), pa.large_binary(), pa.binary_view()])
 def test_pyarrow_variable_binary_to_iceberg(pyarrow_type: pa.DataType) -> None:
     converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
     assert converted_iceberg_type == BinaryType()
-    assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pa.large_binary()
+    assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pa.binary()
 
 
 def test_pyarrow_struct_to_iceberg() -> None:
diff --git a/tests/test_schema.py b/tests/test_schema.py
index 3ca74c4027..a7fab18478 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -1648,7 +1648,7 @@ def test_arrow_schema() -> None:
 
     expected_schema = pa.schema(
         [
-            pa.field("foo", pa.large_string(), nullable=False),
+            pa.field("foo", pa.string(), nullable=False),
             pa.field("bar", pa.int32(), nullable=True),
             pa.field("baz", pa.bool_(), nullable=True),
         ]

From 4380c3ea8b97176287f133ded470667cbecbc20c Mon Sep 17 00:00:00 2001
From: Kevin Liu <kevin.jq.liu@gmail.com>
Date: Thu, 27 Mar 2025 13:22:25 -0700
Subject: [PATCH 2/6] update doc

---
 mkdocs/docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md
index 1e364a11fe..9e1b84a3b3 100644
--- a/mkdocs/docs/configuration.md
+++ b/mkdocs/docs/configuration.md
@@ -199,7 +199,7 @@ PyIceberg uses [S3FileSystem](https://arrow.apache.org/docs/python/generated/pya
 
 | Key                             | Example | Description                                                                                                                                                                                                                                                                                                                                                     |
 | ------------------------------- | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| pyarrow.use-large-types-on-read | True    | Use large PyArrow types i.e. [large_string](https://arrow.apache.org/docs/python/generated/pyarrow.large_string.html), [large_binary](https://arrow.apache.org/docs/python/generated/pyarrow.large_binary.html) and [large_list](https://arrow.apache.org/docs/python/generated/pyarrow.large_list.html) field types on table scans. The default value is True. |
+| pyarrow.use-large-types-on-read | False    | Use large PyArrow types i.e. [large_string](https://arrow.apache.org/docs/python/generated/pyarrow.large_string.html), [large_binary](https://arrow.apache.org/docs/python/generated/pyarrow.large_binary.html) and [large_list](https://arrow.apache.org/docs/python/generated/pyarrow.large_list.html) field types on table scans. The default value is False. |
 
 <!-- markdown-link-check-enable-->
 

From 191ff95287fb1b629f0da201438abcc4a798e116 Mon Sep 17 00:00:00 2001
From: Kevin Liu <kevin.jq.liu@gmail.com>
Date: Thu, 27 Mar 2025 13:54:01 -0700
Subject: [PATCH 3/6] make PYARROW_USE_LARGE_TYPES_ON_READ work

---
 pyiceberg/io/pyarrow.py         |  2 +-
 tests/integration/test_reads.py | 15 +++++++++------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 5f88a37218..18dfd47dc5 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -1612,7 +1612,7 @@ def _table_from_scan_task(task: FileScanTask) -> pa.Table:
                 removed_in="0.11.0",
                 help_message=f"Property `{PYARROW_USE_LARGE_TYPES_ON_READ}` will be removed.",
             )
-            result = result.cast(arrow_schema)
+            result = result.cast(_pyarrow_schema_ensure_large_types(arrow_schema))
 
         if self._limit is not None:
             return result.slice(0, self._limit)
diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py
index 5ac5162f8e..44718215b8 100644
--- a/tests/integration/test_reads.py
+++ b/tests/integration/test_reads.py
@@ -872,9 +872,12 @@ def test_table_scan_keep_types(catalog: Catalog) -> None:
 
 
 @pytest.mark.integration
+@pytest.mark.filterwarnings(
+    "ignore:Deprecated in 0.10.0, will be removed in 0.11.0. Property `pyarrow.use-large-types-on-read` will be removed.:DeprecationWarning"
+)
 @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-def test_table_scan_override_with_small_types(catalog: Catalog) -> None:
-    identifier = "default.test_table_scan_override_with_small_types"
+def test_table_scan_override_with_large_types(catalog: Catalog) -> None:
+    identifier = "default.test_table_scan_override_with_large_types"
     arrow_table = pa.Table.from_arrays(
         [
             pa.array(["a", "b", "c"]),
@@ -900,15 +903,15 @@ def test_table_scan_override_with_small_types(catalog: Catalog) -> None:
     with tbl.update_schema() as update_schema:
         update_schema.update_column("string-to-binary", BinaryType())
 
-    tbl.io.properties[PYARROW_USE_LARGE_TYPES_ON_READ] = "False"
+    tbl.io.properties[PYARROW_USE_LARGE_TYPES_ON_READ] = "True"
     result_table = tbl.scan().to_arrow()
 
     expected_schema = pa.schema(
         [
-            pa.field("string", pa.string()),
+            pa.field("string", pa.large_string()),
             pa.field("string-to-binary", pa.large_binary()),
-            pa.field("binary", pa.binary()),
-            pa.field("list", pa.list_(pa.string())),
+            pa.field("binary", pa.large_binary()),
+            pa.field("list", pa.large_list(pa.large_string())),
         ]
     )
     assert result_table.schema.equals(expected_schema)

From 06f785784afb5347b1d6fc185f0a25f84242a405 Mon Sep 17 00:00:00 2001
From: Kevin Liu <kevin.jq.liu@gmail.com>
Date: Thu, 27 Mar 2025 13:58:43 -0700
Subject: [PATCH 4/6] ensure large type

---
 pyiceberg/io/pyarrow.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 18dfd47dc5..2ec875fef5 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -1718,8 +1718,8 @@ def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array:
                 target_schema = schema_to_pyarrow(
                     promote(file_field.field_type, field.field_type), include_field_ids=self._include_field_ids
                 )
-                if self._use_large_types is False:
-                    target_schema = _pyarrow_schema_ensure_small_types(target_schema)
+                if self._use_large_types is True:
+                    target_schema = _pyarrow_schema_ensure_large_types(target_schema)
                 return values.cast(target_schema)
             elif (target_type := schema_to_pyarrow(field.field_type, include_field_ids=self._include_field_ids)) != values.type:
                 if field.field_type == TimestampType():

From b84d6e46a4681a8a1cf1dd94442e5538eec7b9b2 Mon Sep 17 00:00:00 2001
From: Kevin Liu <kevinjqliu@users.noreply.github.com>
Date: Mon, 31 Mar 2025 16:07:43 -0400
Subject: [PATCH 5/6] Update mkdocs/docs/configuration.md

Co-authored-by: Fokko Driesprong <fokko@apache.org>
---
 mkdocs/docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md
index 9e1b84a3b3..ce95c091d1 100644
--- a/mkdocs/docs/configuration.md
+++ b/mkdocs/docs/configuration.md
@@ -199,7 +199,7 @@ PyIceberg uses [S3FileSystem](https://arrow.apache.org/docs/python/generated/pya
 
 | Key                             | Example | Description                                                                                                                                                                                                                                                                                                                                                     |
 | ------------------------------- | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| pyarrow.use-large-types-on-read | False    | Use large PyArrow types i.e. [large_string](https://arrow.apache.org/docs/python/generated/pyarrow.large_string.html), [large_binary](https://arrow.apache.org/docs/python/generated/pyarrow.large_binary.html) and [large_list](https://arrow.apache.org/docs/python/generated/pyarrow.large_list.html) field types on table scans. The default value is False. |
+| pyarrow.use-large-types-on-read | False    | Force large PyArrow types i.e. [large_string](https://arrow.apache.org/docs/python/generated/pyarrow.large_string.html), [large_binary](https://arrow.apache.org/docs/python/generated/pyarrow.large_binary.html) and [large_list](https://arrow.apache.org/docs/python/generated/pyarrow.large_list.html) field types on table scans. The default value is False. |
 
 <!-- markdown-link-check-enable-->
 

From 79a80c25111223c37c7cd8502f6de59e0f5e5542 Mon Sep 17 00:00:00 2001
From: Kevin Liu <kevin.jq.liu@gmail.com>
Date: Fri, 25 Apr 2025 09:20:45 -0700
Subject: [PATCH 6/6] a few more

---
 mkdocs/docs/api.md                               | 16 ++++++++--------
 tests/catalog/test_sql.py                        | 10 +++++-----
 tests/conftest.py                                |  6 +++---
 .../test_writes/test_partitioned_writes.py       |  2 +-
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md
index d84c82ec2a..9fb5947059 100644
--- a/mkdocs/docs/api.md
+++ b/mkdocs/docs/api.md
@@ -418,7 +418,7 @@ This produces the following result with `tbl.scan().to_arrow()`:
 
 ```python
 pyarrow.Table
-city: large_string
+city: string
 lat: double
 long: double
 ----
@@ -476,7 +476,7 @@ This produces the following result with `tbl.scan().to_arrow()`:
 
 ```python
 pyarrow.Table
-city: large_string
+city: string
 lat: double
 long: double
 ----
@@ -957,14 +957,14 @@ split_offsets: list<item: int64>
 equality_ids: list<item: int32>
   child 0, item: int32
 sort_order_id: int32
-readable_metrics: struct<city: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: large_string, upper_bound: large_string> not null, lat: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: double, upper_bound: double> not null, long: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: double, upper_bound: double> not null>
+readable_metrics: struct<city: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: string, upper_bound: string> not null, lat: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: double, upper_bound: double> not null, long: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: double, upper_bound: double> not null>
   child 0, city: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: string, upper_bound: string> not null
       child 0, column_size: int64
       child 1, value_count: int64
       child 2, null_value_count: int64
       child 3, nan_value_count: int64
-      child 4, lower_bound: large_string
-      child 5, upper_bound: large_string
+      child 4, lower_bound: string
+      child 5, upper_bound: string
   child 1, lat: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: double, upper_bound: double> not null
       child 0, column_size: int64
       child 1, value_count: int64
@@ -998,7 +998,7 @@ equality_ids:[[[],[]]]
 sort_order_id:[[[],[]]]
 readable_metrics: [
   -- is_valid: all not null
-  -- child 0 type: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: large_string, upper_bound: large_string>
+  -- child 0 type: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: string, upper_bound: string>
     -- is_valid: all not null
     -- child 0 type: int64
 [140]
@@ -1008,9 +1008,9 @@ readable_metrics: [
 [0]
     -- child 3 type: int64
 [null]
-    -- child 4 type: large_string
+    -- child 4 type: string
 ["Amsterdam"]
-    -- child 5 type: large_string
+    -- child 5 type: string
 ["San Francisco"]
   -- child 1 type: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: double, upper_bound: double>
     -- is_valid: all not null
diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py
index 8c3047b2ca..3482302c90 100644
--- a/tests/catalog/test_sql.py
+++ b/tests/catalog/test_sql.py
@@ -404,7 +404,7 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, table_identifier: Identifier)
         ],
         schema=pa.schema(
             [
-                pa.field("foo", pa.large_string(), nullable=True),
+                pa.field("foo", pa.string(), nullable=True),
                 pa.field("bar", pa.int32(), nullable=False),
                 pa.field("baz", pa.bool_(), nullable=True),
                 pa.field("large", pa.large_string(), nullable=True),
@@ -1462,7 +1462,7 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None:
         {
             "foo": ["a", None, "z"],
         },
-        schema=pa.schema([pa.field("foo", pa.large_string(), nullable=True)]),
+        schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]),
     )
 
     tbl = catalog.create_table(identifier=identifier, schema=pa_table.schema, properties={"format-version": str(format_version)})
@@ -1474,7 +1474,7 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None:
         },
         schema=pa.schema(
             [
-                pa.field("foo", pa.large_string(), nullable=True),
+                pa.field("foo", pa.string(), nullable=True),
                 pa.field("bar", pa.int32(), nullable=True),
             ]
         ),
@@ -1514,7 +1514,7 @@ def test_create_table_transaction(catalog: SqlCatalog, format_version: int) -> N
         {
             "foo": ["a", None, "z"],
         },
-        schema=pa.schema([pa.field("foo", pa.large_string(), nullable=True)]),
+        schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]),
     )
 
     pa_table_with_column = pa.Table.from_pydict(
@@ -1524,7 +1524,7 @@ def test_create_table_transaction(catalog: SqlCatalog, format_version: int) -> N
         },
         schema=pa.schema(
             [
-                pa.field("foo", pa.large_string(), nullable=True),
+                pa.field("foo", pa.string(), nullable=True),
                 pa.field("bar", pa.int32(), nullable=True),
             ]
         ),
diff --git a/tests/conftest.py b/tests/conftest.py
index 09f3a15d56..b142612553 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2510,8 +2510,8 @@ def pa_schema() -> "pa.Schema":
     return pa.schema(
         [
             ("bool", pa.bool_()),
-            ("string", pa.large_string()),
-            ("string_long", pa.large_string()),
+            ("string", pa.string()),
+            ("string_long", pa.string()),
             ("int", pa.int32()),
             ("long", pa.int64()),
             ("float", pa.float32()),
@@ -2525,7 +2525,7 @@ def pa_schema() -> "pa.Schema":
             # ("time", pa.time64("us")),
             # Not natively supported by Arrow
             # ("uuid", pa.fixed(16)),
-            ("binary", pa.large_binary()),
+            ("binary", pa.binary()),
             ("fixed", pa.binary(16)),
         ]
     )
diff --git a/tests/integration/test_writes/test_partitioned_writes.py b/tests/integration/test_writes/test_partitioned_writes.py
index a299036e6b..268591ab9d 100644
--- a/tests/integration/test_writes/test_partitioned_writes.py
+++ b/tests/integration/test_writes/test_partitioned_writes.py
@@ -891,7 +891,7 @@ def test_unsupported_transform(
 
     with pytest.raises(
         ValueError,
-        match="FeatureUnsupported => Unsupported data type for truncate transform: LargeBinary",
+        match="FeatureUnsupported => Unsupported data type for truncate transform: Binary",
     ):
         tbl.append(arrow_table_with_null)