@@ -53,6 +53,7 @@ def read_csv(
53
53
ignore_errors : bool = False ,
54
54
try_parse_dates : bool = False ,
55
55
n_threads : int | None = None ,
56
+ infer_schema : bool = True ,
56
57
infer_schema_length : int | None = N_INFER_DEFAULT ,
57
58
batch_size : int = 8192 ,
58
59
n_rows : int | None = None ,
@@ -126,7 +127,7 @@ def read_csv(
126
127
Before using this option, try to increase the number of lines used for schema
127
128
inference with e.g `infer_schema_length=10000` or override automatic dtype
128
129
inference for specific columns with the `schema_overrides` option or use
129
- `infer_schema_length=0 ` to read all columns as `pl.String` to check which
130
+ `infer_schema=False ` to read all columns as `pl.String` to check which
130
131
values might cause an issue.
131
132
try_parse_dates
132
133
Try to automatically parse dates. Most ISO8601-like formats can
@@ -136,10 +137,15 @@ def read_csv(
136
137
n_threads
137
138
Number of threads to use in csv parsing.
138
139
Defaults to the number of physical cpu's of your system.
140
+ infer_schema
141
+ When `True`, the schema is inferred from the data using the first
142
+ `infer_schema_length` rows.
143
+ When `False`, the schema is not inferred and will be `pl.String` if not
144
+ specified in `schema` or `schema_overrides`.
139
145
infer_schema_length
140
146
The maximum number of rows to scan for schema inference.
141
- If set to `0`, all columns will be read as `pl.String`.
142
147
If set to `None`, the full data may be scanned *(this is slow)*.
148
+ Set `infer_schema=False` to read all columns as `pl.String`.
143
149
batch_size
144
150
Number of lines to read into the buffer at once.
145
151
Modify this to change performance.
@@ -184,7 +190,7 @@ def read_csv(
184
190
with windows line endings (`\r\n`), one can go with the default `\n`. The extra
185
191
`\r` will be removed when processed.
186
192
raise_if_empty
187
- When there is no data in the source,`NoDataError` is raised. If this parameter
193
+ When there is no data in the source, `NoDataError` is raised. If this parameter
188
194
is set to False, an empty DataFrame (with no columns) is returned instead.
189
195
truncate_ragged_lines
190
196
Truncate lines that are longer than the schema.
@@ -410,6 +416,9 @@ def read_csv(
410
416
for column_name , column_dtype in schema_overrides .items ()
411
417
}
412
418
419
+ if not infer_schema :
420
+ infer_schema_length = 0
421
+
413
422
with prepare_file_arg (
414
423
source ,
415
424
encoding = encoding ,
@@ -922,6 +931,7 @@ def scan_csv(
922
931
ignore_errors : bool = False ,
923
932
cache : bool = True ,
924
933
with_column_names : Callable [[list [str ]], list [str ]] | None = None ,
934
+ infer_schema : bool = True ,
925
935
infer_schema_length : int | None = N_INFER_DEFAULT ,
926
936
n_rows : int | None = None ,
927
937
encoding : CsvEncoding = "utf8" ,
@@ -989,17 +999,22 @@ def scan_csv(
989
999
utf8 values to be treated as the empty string you can set this param True.
990
1000
ignore_errors
991
1001
Try to keep reading lines if some lines yield errors.
992
- First try `infer_schema_length=0 ` to read all columns as
1002
+ First try `infer_schema=False ` to read all columns as
993
1003
`pl.String` to check which values might cause an issue.
994
1004
cache
995
1005
Cache the result after reading.
996
1006
with_column_names
997
1007
Apply a function over the column names just in time (when they are determined);
998
1008
this function will receive (and should return) a list of column names.
1009
+ infer_schema
1010
+ When `True`, the schema is inferred from the data using the first
1011
+ `infer_schema_length` rows.
1012
+ When `False`, the schema is not inferred and will be `pl.String` if not
1013
+ specified in `schema` or `schema_overrides`.
999
1014
infer_schema_length
1000
1015
The maximum number of rows to scan for schema inference.
1001
- If set to `0`, all columns will be read as `pl.String`.
1002
1016
If set to `None`, the full data may be scanned *(this is slow)*.
1017
+ Set `infer_schema=False` to read all columns as `pl.String`.
1003
1018
n_rows
1004
1019
Stop reading from CSV file after reading `n_rows`.
1005
1020
encoding : {'utf8', 'utf8-lossy'}
@@ -1029,7 +1044,7 @@ def scan_csv(
1029
1044
scanning a headerless CSV file). If the given list is shorter than the width of
1030
1045
the DataFrame the remaining columns will have their original name.
1031
1046
raise_if_empty
1032
- When there is no data in the source,`NoDataError` is raised. If this parameter
1047
+ When there is no data in the source, `NoDataError` is raised. If this parameter
1033
1048
is set to False, an empty LazyFrame (with no columns) is returned instead.
1034
1049
truncate_ragged_lines
1035
1050
Truncate lines that are longer than the schema.
@@ -1153,6 +1168,9 @@ def with_column_names(cols: list[str]) -> list[str]:
1153
1168
normalize_filepath (source , check_not_directory = False ) for source in source
1154
1169
]
1155
1170
1171
+ if not infer_schema :
1172
+ infer_schema_length = 0
1173
+
1156
1174
return _scan_csv_impl (
1157
1175
source ,
1158
1176
has_header = has_header ,
0 commit comments