|
32 | 32 | from polars.datatypes import (
|
33 | 33 | INTEGER_DTYPES,
|
34 | 34 | TEMPORAL_DTYPES,
|
| 35 | + Array, |
35 | 36 | Boolean,
|
36 | 37 | Categorical,
|
37 | 38 | Date,
|
@@ -96,7 +97,7 @@ def sequence_to_pyseries(
|
96 | 97 | dtype = Null
|
97 | 98 |
|
98 | 99 | # lists defer to subsequent handling; identify nested type
|
99 |
| - elif dtype == List: |
| 100 | + elif dtype in (List, Array): |
100 | 101 | python_dtype = list
|
101 | 102 |
|
102 | 103 | # infer temporal type handling
|
@@ -130,8 +131,9 @@ def sequence_to_pyseries(
|
130 | 131 | # flat data
|
131 | 132 | if (
|
132 | 133 | dtype is not None
|
133 |
| - and dtype not in (List, Struct, Unknown) |
134 | 134 | and is_polars_dtype(dtype)
|
| 135 | + and not dtype.is_nested() |
| 136 | + and dtype != Unknown |
135 | 137 | and (python_dtype is None)
|
136 | 138 | ):
|
137 | 139 | constructor = polars_type_to_constructor(dtype)
|
@@ -160,159 +162,153 @@ def sequence_to_pyseries(
|
160 | 162 | schema=struct_schema,
|
161 | 163 | orient="row",
|
162 | 164 | ).to_struct(name)
|
163 |
| - else: |
164 |
| - if python_dtype is None: |
165 |
| - if value is None: |
166 |
| - constructor = polars_type_to_constructor(Null) |
167 |
| - return constructor(name, values, strict) |
168 |
| - |
169 |
| - # generic default dtype |
170 |
| - python_dtype = type(value) |
171 |
| - |
172 |
| - # temporal branch |
173 |
| - if python_dtype in py_temporal_types: |
174 |
| - if dtype is None: |
175 |
| - dtype = py_type_to_dtype(python_dtype) # construct from integer |
176 |
| - elif dtype in py_temporal_types: |
177 |
| - dtype = py_type_to_dtype(dtype) |
178 |
| - |
179 |
| - values_dtype = ( |
180 |
| - None |
181 |
| - if value is None |
182 |
| - else py_type_to_dtype(type(value), raise_unmatched=False) |
| 165 | + |
| 166 | + if python_dtype is None: |
| 167 | + if value is None: |
| 168 | + constructor = polars_type_to_constructor(Null) |
| 169 | + return constructor(name, values, strict) |
| 170 | + |
| 171 | + # generic default dtype |
| 172 | + python_dtype = type(value) |
| 173 | + |
| 174 | + # temporal branch |
| 175 | + if python_dtype in py_temporal_types: |
| 176 | + if dtype is None: |
| 177 | + dtype = py_type_to_dtype(python_dtype) # construct from integer |
| 178 | + elif dtype in py_temporal_types: |
| 179 | + dtype = py_type_to_dtype(dtype) |
| 180 | + |
| 181 | + values_dtype = ( |
| 182 | + None |
| 183 | + if value is None |
| 184 | + else py_type_to_dtype(type(value), raise_unmatched=False) |
| 185 | + ) |
| 186 | + if values_dtype is not None and values_dtype.is_float(): |
| 187 | + msg = f"'float' object cannot be interpreted as a {python_dtype.__name__!r}" |
| 188 | + raise TypeError( |
| 189 | + # we do not accept float values as temporal; if this is |
| 190 | + # required, the caller should explicitly cast to int first. |
| 191 | + msg |
183 | 192 | )
|
184 |
| - if values_dtype is not None and values_dtype.is_float(): |
185 |
| - msg = f"'float' object cannot be interpreted as a {python_dtype.__name__!r}" |
186 |
| - raise TypeError( |
187 |
| - # we do not accept float values as temporal; if this is |
188 |
| - # required, the caller should explicitly cast to int first. |
189 |
| - msg |
190 |
| - ) |
191 | 193 |
|
192 |
| - # We use the AnyValue builder to create the datetime array |
193 |
| - # We store the values internally as UTC and set the timezone |
194 |
| - py_series = PySeries.new_from_any_values(name, values, strict) |
| 194 | + # We use the AnyValue builder to create the datetime array |
| 195 | + # We store the values internally as UTC and set the timezone |
| 196 | + py_series = PySeries.new_from_any_values(name, values, strict) |
195 | 197 |
|
196 |
| - time_unit = getattr(dtype, "time_unit", None) |
197 |
| - time_zone = getattr(dtype, "time_zone", None) |
| 198 | + time_unit = getattr(dtype, "time_unit", None) |
| 199 | + time_zone = getattr(dtype, "time_zone", None) |
198 | 200 |
|
199 |
| - if time_unit is None or values_dtype == Date: |
200 |
| - s = wrap_s(py_series) |
201 |
| - else: |
202 |
| - s = wrap_s(py_series).dt.cast_time_unit(time_unit) |
| 201 | + if time_unit is None or values_dtype == Date: |
| 202 | + s = wrap_s(py_series) |
| 203 | + else: |
| 204 | + s = wrap_s(py_series).dt.cast_time_unit(time_unit) |
203 | 205 |
|
204 |
| - if (values_dtype == Date) & (dtype == Datetime): |
205 |
| - return ( |
206 |
| - s.cast(Datetime(time_unit or "us")) |
207 |
| - .dt.replace_time_zone(time_zone) |
208 |
| - ._s |
| 206 | + if (values_dtype == Date) & (dtype == Datetime): |
| 207 | + return ( |
| 208 | + s.cast(Datetime(time_unit or "us")).dt.replace_time_zone(time_zone)._s |
| 209 | + ) |
| 210 | + |
| 211 | + if (dtype == Datetime) and (value.tzinfo is not None or time_zone is not None): |
| 212 | + values_tz = str(value.tzinfo) if value.tzinfo is not None else None |
| 213 | + dtype_tz = dtype.time_zone # type: ignore[union-attr] |
| 214 | + if values_tz is not None and (dtype_tz is not None and dtype_tz != "UTC"): |
| 215 | + msg = ( |
| 216 | + "time-zone-aware datetimes are converted to UTC" |
| 217 | + "\n\nPlease either drop the time zone from the dtype, or set it to 'UTC'." |
| 218 | + " To convert to a different time zone, please use `.dt.convert_time_zone`." |
| 219 | + ) |
| 220 | + raise ValueError(msg) |
| 221 | + if values_tz != "UTC" and dtype_tz is None: |
| 222 | + warnings.warn( |
| 223 | + "Constructing a Series with time-zone-aware " |
| 224 | + "datetimes results in a Series with UTC time zone. " |
| 225 | + "To silence this warning, you can filter " |
| 226 | + "warnings of class TimeZoneAwareConstructorWarning, or " |
| 227 | + "set 'UTC' as the time zone of your datatype.", |
| 228 | + TimeZoneAwareConstructorWarning, |
| 229 | + stacklevel=find_stacklevel(), |
209 | 230 | )
|
| 231 | + return s.dt.replace_time_zone(dtype_tz or "UTC")._s |
| 232 | + return s._s |
210 | 233 |
|
211 |
| - if (dtype == Datetime) and ( |
212 |
| - value.tzinfo is not None or time_zone is not None |
213 |
| - ): |
214 |
| - values_tz = str(value.tzinfo) if value.tzinfo is not None else None |
215 |
| - dtype_tz = dtype.time_zone # type: ignore[union-attr] |
216 |
| - if values_tz is not None and ( |
217 |
| - dtype_tz is not None and dtype_tz != "UTC" |
218 |
| - ): |
219 |
| - msg = ( |
220 |
| - "time-zone-aware datetimes are converted to UTC" |
221 |
| - "\n\nPlease either drop the time zone from the dtype, or set it to 'UTC'." |
222 |
| - " To convert to a different time zone, please use `.dt.convert_time_zone`." |
223 |
| - ) |
224 |
| - raise ValueError(msg) |
225 |
| - if values_tz != "UTC" and dtype_tz is None: |
226 |
| - warnings.warn( |
227 |
| - "Constructing a Series with time-zone-aware " |
228 |
| - "datetimes results in a Series with UTC time zone. " |
229 |
| - "To silence this warning, you can filter " |
230 |
| - "warnings of class TimeZoneAwareConstructorWarning, or " |
231 |
| - "set 'UTC' as the time zone of your datatype.", |
232 |
| - TimeZoneAwareConstructorWarning, |
233 |
| - stacklevel=find_stacklevel(), |
234 |
| - ) |
235 |
| - return s.dt.replace_time_zone(dtype_tz or "UTC")._s |
236 |
| - return s._s |
| 234 | + elif ( |
| 235 | + _check_for_numpy(value) |
| 236 | + and isinstance(value, np.ndarray) |
| 237 | + and len(value.shape) == 1 |
| 238 | + ): |
| 239 | + n_elems = len(value) |
| 240 | + if all(len(v) == n_elems for v in values): |
| 241 | + # can take (much) faster path if all lists are the same length |
| 242 | + return numpy_to_pyseries( |
| 243 | + name, |
| 244 | + np.vstack(values), |
| 245 | + strict=strict, |
| 246 | + nan_to_null=nan_to_null, |
| 247 | + ) |
| 248 | + else: |
| 249 | + return PySeries.new_series_list( |
| 250 | + name, |
| 251 | + [ |
| 252 | + numpy_to_pyseries("", v, strict=strict, nan_to_null=nan_to_null) |
| 253 | + for v in values |
| 254 | + ], |
| 255 | + strict, |
| 256 | + ) |
237 | 257 |
|
238 |
| - elif ( |
239 |
| - _check_for_numpy(value) |
240 |
| - and isinstance(value, np.ndarray) |
241 |
| - and len(value.shape) == 1 |
242 |
| - ): |
243 |
| - n_elems = len(value) |
244 |
| - if all(len(v) == n_elems for v in values): |
245 |
| - # can take (much) faster path if all lists are the same length |
246 |
| - return numpy_to_pyseries( |
247 |
| - name, |
248 |
| - np.vstack(values), |
249 |
| - strict=strict, |
250 |
| - nan_to_null=nan_to_null, |
251 |
| - ) |
| 258 | + elif python_dtype in (list, tuple): |
| 259 | + if dtype is None: |
| 260 | + return PySeries.new_from_any_values(name, values, strict=strict) |
| 261 | + elif dtype == Object: |
| 262 | + return PySeries.new_object(name, values, strict) |
| 263 | + else: |
| 264 | + if (inner_dtype := getattr(dtype, "inner", None)) is not None: |
| 265 | + pyseries_list = [ |
| 266 | + None |
| 267 | + if value is None |
| 268 | + else sequence_to_pyseries( |
| 269 | + "", |
| 270 | + value, |
| 271 | + inner_dtype, |
| 272 | + strict=strict, |
| 273 | + nan_to_null=nan_to_null, |
| 274 | + ) |
| 275 | + for value in values |
| 276 | + ] |
| 277 | + pyseries = PySeries.new_series_list(name, pyseries_list, strict) |
252 | 278 | else:
|
253 |
| - return PySeries.new_series_list( |
254 |
| - name, |
255 |
| - [ |
256 |
| - numpy_to_pyseries("", v, strict=strict, nan_to_null=nan_to_null) |
257 |
| - for v in values |
258 |
| - ], |
259 |
| - strict, |
| 279 | + pyseries = PySeries.new_from_any_values_and_dtype( |
| 280 | + name, values, dtype, strict=strict |
260 | 281 | )
|
| 282 | + if dtype != pyseries.dtype(): |
| 283 | + pyseries = pyseries.cast(dtype, strict=False) |
| 284 | + return pyseries |
261 | 285 |
|
262 |
| - elif python_dtype in (list, tuple): |
263 |
| - if dtype is None: |
264 |
| - return PySeries.new_from_any_values(name, values, strict=strict) |
265 |
| - elif dtype == Object: |
266 |
| - return PySeries.new_object(name, values, strict) |
267 |
| - else: |
268 |
| - if (inner_dtype := getattr(dtype, "inner", None)) is not None: |
269 |
| - pyseries_list = [ |
270 |
| - None |
271 |
| - if value is None |
272 |
| - else sequence_to_pyseries( |
273 |
| - "", |
274 |
| - value, |
275 |
| - inner_dtype, |
276 |
| - strict=strict, |
277 |
| - nan_to_null=nan_to_null, |
278 |
| - ) |
279 |
| - for value in values |
280 |
| - ] |
281 |
| - pyseries = PySeries.new_series_list(name, pyseries_list, strict) |
| 286 | + elif python_dtype == pl.Series: |
| 287 | + return PySeries.new_series_list( |
| 288 | + name, [v._s if v is not None else None for v in values], strict |
| 289 | + ) |
| 290 | + |
| 291 | + elif python_dtype == PySeries: |
| 292 | + return PySeries.new_series_list(name, values, strict) |
| 293 | + else: |
| 294 | + constructor = py_type_to_constructor(python_dtype) |
| 295 | + if constructor == PySeries.new_object: |
| 296 | + try: |
| 297 | + srs = PySeries.new_from_any_values(name, values, strict) |
| 298 | + if _check_for_numpy(python_dtype, check_type=False) and isinstance( |
| 299 | + np.bool_(True), np.generic |
| 300 | + ): |
| 301 | + dtype = numpy_char_code_to_dtype(np.dtype(python_dtype).char) |
| 302 | + return srs.cast(dtype, strict=strict) |
282 | 303 | else:
|
283 |
| - pyseries = PySeries.new_from_any_values_and_dtype( |
284 |
| - name, values, dtype, strict=strict |
285 |
| - ) |
286 |
| - if dtype != pyseries.dtype(): |
287 |
| - pyseries = pyseries.cast(dtype, strict=False) |
288 |
| - return pyseries |
| 304 | + return srs |
289 | 305 |
|
290 |
| - elif python_dtype == pl.Series: |
291 |
| - return PySeries.new_series_list( |
292 |
| - name, [v._s if v is not None else None for v in values], strict |
293 |
| - ) |
| 306 | + except RuntimeError: |
| 307 | + return PySeries.new_from_any_values(name, values, strict=strict) |
294 | 308 |
|
295 |
| - elif python_dtype == PySeries: |
296 |
| - return PySeries.new_series_list(name, values, strict) |
297 |
| - else: |
298 |
| - constructor = py_type_to_constructor(python_dtype) |
299 |
| - if constructor == PySeries.new_object: |
300 |
| - try: |
301 |
| - srs = PySeries.new_from_any_values(name, values, strict) |
302 |
| - if _check_for_numpy(python_dtype, check_type=False) and isinstance( |
303 |
| - np.bool_(True), np.generic |
304 |
| - ): |
305 |
| - dtype = numpy_char_code_to_dtype(np.dtype(python_dtype).char) |
306 |
| - return srs.cast(dtype, strict=strict) |
307 |
| - else: |
308 |
| - return srs |
309 |
| - |
310 |
| - except RuntimeError: |
311 |
| - return PySeries.new_from_any_values(name, values, strict=strict) |
312 |
| - |
313 |
| - return _construct_series_with_fallbacks( |
314 |
| - constructor, name, values, dtype, strict=strict |
315 |
| - ) |
| 309 | + return _construct_series_with_fallbacks( |
| 310 | + constructor, name, values, dtype, strict=strict |
| 311 | + ) |
316 | 312 |
|
317 | 313 |
|
318 | 314 | def _construct_series_with_fallbacks(
|
|
0 commit comments