Skip to content

Commit

Permalink
fix(python): Make Series.__getitem__ raise an IndexError (#11061)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Sep 13, 2023
1 parent 8cdb310 commit 4eeaa00
Show file tree
Hide file tree
Showing 16 changed files with 200 additions and 81 deletions.
4 changes: 2 additions & 2 deletions crates/polars-core/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ impl Schema {
) -> PolarsResult<Self> {
polars_ensure!(
index <= self.len(),
ComputeError:
OutOfBounds:
"index {} is out of bounds for schema with length {} (the max index allowed is self.len())",
index,
self.len()
Expand Down Expand Up @@ -167,7 +167,7 @@ impl Schema {
) -> PolarsResult<Option<DataType>> {
polars_ensure!(
index <= self.len(),
ComputeError:
OutOfBounds:
"index {} is out of bounds for schema with length {} (the max index allowed is self.len())",
index,
self.len()
Expand Down
5 changes: 4 additions & 1 deletion crates/polars-error/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ pub enum PolarsError {
Io(#[from] io::Error),
#[error("no data: {0}")]
NoData(ErrString),
#[error("{0}")]
OutOfBounds(ErrString),
#[error("field not found: {0}")]
SchemaFieldNotFound(ErrString),
#[error("data types don't match: {0}")]
Expand Down Expand Up @@ -105,6 +107,7 @@ impl PolarsError {
InvalidOperation(msg) => InvalidOperation(func(msg).into()),
Io(err) => ComputeError(func(&format!("IO: {err}")).into()),
NoData(msg) => NoData(func(msg).into()),
OutOfBounds(msg) => OutOfBounds(func(msg).into()),
SchemaFieldNotFound(msg) => SchemaFieldNotFound(func(msg).into()),
SchemaMismatch(msg) => SchemaMismatch(func(msg).into()),
ShapeMismatch(msg) => ShapeMismatch(func(msg).into()),
Expand Down Expand Up @@ -205,7 +208,7 @@ on startup."#.trim_start())
polars_err!(Duplicate: "column with name '{}' has more than one occurrences", $name)
};
(oob = $idx:expr, $len:expr) => {
polars_err!(ComputeError: "index {} is out of bounds for sequence of size {}", $idx, $len)
polars_err!(OutOfBounds: "index {} is out of bounds for sequence of length {}", $idx, $len)
};
(agg_len = $agg_len:expr, $groups_len:expr) => {
polars_err!(
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/src/csv/read_impl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ impl<'a> CoreReader<'a> {
for i in projection {
let (_, dtype) = self.schema.get_at_index(*i).ok_or_else(|| {
polars_err!(
ComputeError:
OutOfBounds:
"projection index {} is out of bounds for CSV schema with {} columns",
i, self.schema.len(),
)
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/series/ops/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ where
Ok(IdxSize::try_from(v).unwrap())
} else {
IdxSize::from_i64(len + v.to_i64().unwrap()).ok_or_else(|| {
PolarsError::ComputeError(
PolarsError::OutOfBounds(
format!(
"index {} is out of bounds for series of len {}",
v, target_len
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
DuplicateError,
InvalidOperationError,
NoDataError,
OutOfBoundsError,
PolarsPanicError,
SchemaError,
SchemaFieldNotFoundError,
Expand Down Expand Up @@ -202,6 +203,7 @@
"DuplicateError",
"InvalidOperationError",
"NoDataError",
"OutOfBoundsError",
"PolarsPanicError",
"SchemaError",
"SchemaFieldNotFoundError",
Expand Down
9 changes: 5 additions & 4 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1853,10 +1853,11 @@ def item(self, row: int | None = None, column: int | str | None = None) -> Any:
if row is None and column is None:
if self.shape != (1, 1):
raise ValueError(
f"can only call `.item()` if the dataframe is of shape (1, 1), or if"
f" explicit row/col values are provided; frame has shape {self.shape!r}"
"can only call `.item()` if the dataframe is of shape (1, 1),"
" or if explicit row/col values are provided;"
f" frame has shape {self.shape!r}"
)
return self._df.select_at_idx(0).get_idx(0)
return self._df.select_at_idx(0).get_index(0)

elif row is None or column is None:
raise ValueError("cannot call `.item()` with only one of `row` or `column`")
Expand All @@ -1868,7 +1869,7 @@ def item(self, row: int | None = None, column: int | str | None = None) -> Any:
)
if s is None:
raise IndexError(f"column index {column!r} is out of bounds")
return s.get_idx(row)
return s.get_index_signed(row)

def to_arrow(self) -> pa.Table:
"""
Expand Down
11 changes: 8 additions & 3 deletions py-polars/polars/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
DuplicateError,
InvalidOperationError,
NoDataError,
OutOfBoundsError,
PolarsPanicError,
SchemaError,
SchemaFieldNotFoundError,
Expand Down Expand Up @@ -35,6 +36,12 @@ class InvalidOperationError(Exception): # type: ignore[no-redef]
class NoDataError(Exception): # type: ignore[no-redef]
"""Exception raised when an operation can not be performed on an empty data structure.""" # noqa: W505

class OutOfBoundsError(Exception): # type: ignore[no-redef]
"""Exception raised when the given index is out of bounds."""

class PolarsPanicError(Exception): # type: ignore[no-redef]
"""Exception raised when an unexpected state causes a panic in the underlying Rust library.""" # noqa: W505

class SchemaError(Exception): # type: ignore[no-redef]
"""Exception raised when trying to combine data structures with mismatched schemas.""" # noqa: W505

Expand All @@ -50,9 +57,6 @@ class StringCacheMismatchError(Exception): # type: ignore[no-redef]
class StructFieldNotFoundError(Exception): # type: ignore[no-redef]
"""Exception raised when a specified schema field is not found."""

class PolarsPanicError(Exception): # type: ignore[no-redef]
"""Exception raised when an unexpected state causes a panic in the underlying Rust library.""" # noqa: W505


class ChronoFormatWarning(Warning):
"""
Expand Down Expand Up @@ -103,6 +107,7 @@ class UnsuitableSQLError(ValueError):
"InvalidOperationError",
"NoDataError",
"NoRowsReturnedError",
"OutOfBoundsError",
"PolarsInefficientMapWarning",
"PolarsPanicError",
"RowsError",
Expand Down
40 changes: 21 additions & 19 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,11 +933,11 @@ def __contains__(self, item: Any) -> bool:
def __iter__(self) -> Generator[Any, None, None]:
if self.dtype == List:
# TODO: either make a change and return py-native list data here, or find
# a faster way to return nested/List series; sequential 'get_idx' calls
# a faster way to return nested/List series; sequential 'get_index' calls
# make this path a lot slower (~10x) than it needs to be.
get_idx = self._s.get_idx
get_index = self._s.get_index
for idx in range(self.len()):
yield get_idx(idx)
yield get_index(idx)
else:
buffer_size = 25_000
for offset in range(0, self.len(), buffer_size):
Expand Down Expand Up @@ -1019,17 +1019,15 @@ def __getitem__(
elif _check_for_numpy(item) and isinstance(item, np.ndarray):
return self._take_with_series(numpy_to_idxs(item, self.len()))

# Integer.
# Integer
elif isinstance(item, int):
if item < 0:
item = self.len() + item
return self._s.get_idx(item)
return self._s.get_index_signed(item)

# Slice.
# Slice
elif isinstance(item, slice):
return PolarsSlice(self).apply(item)

# Range.
# Range
elif isinstance(item, range):
return self[range_to_slice(item)]

Expand Down Expand Up @@ -1191,12 +1189,13 @@ def _repr_html_(self) -> str:
"""Format output data in HTML for display in Jupyter Notebooks."""
return self.to_frame()._repr_html_(from_series=True)

def item(self, row: int | None = None) -> Any:
@deprecate_renamed_parameter("row", "index", version="0.19.3")
def item(self, index: int | None = None) -> Any:
"""
Return the series as a scalar, or return the element at the given row index.
Return the series as a scalar, or return the element at the given index.
If no row index is provided, this is equivalent to ``s[0]``, with a check
that the shape is (1,). With a row index, this is equivalent to ``s[row]``.
If no index is provided, this is equivalent to ``s[0]``, with a check
that the shape is (1,). With an index, this is equivalent to ``s[index]``.
Examples
--------
Expand All @@ -1208,12 +1207,15 @@ def item(self, row: int | None = None) -> Any:
24
"""
if row is None and len(self) != 1:
raise ValueError(
f"can only call '.item()' if the series is of length 1, or an"
f" explicit row index is provided (series is of length {len(self)})"
)
return self[row or 0]
if index is None:
if len(self) != 1:
raise ValueError(
"can only call '.item()' if the series is of length 1,"
f" or an explicit index is provided (series is of length {len(self)})"
)
return self._s.get_index(0)

return self._s.get_index_signed(index)

def estimated_size(self, unit: SizeUnit = "b") -> int | float:
"""
Expand Down
2 changes: 2 additions & 0 deletions py-polars/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ impl std::convert::From<PyPolarsErr> for PyErr {
},
PolarsError::Io(err) => PyIOError::new_err(err.to_string()),
PolarsError::NoData(err) => NoDataError::new_err(err.to_string()),
PolarsError::OutOfBounds(err) => OutOfBoundsError::new_err(err.to_string()),
PolarsError::SchemaFieldNotFound(name) => {
SchemaFieldNotFoundError::new_err(name.to_string())
},
Expand Down Expand Up @@ -75,6 +76,7 @@ create_exception!(exceptions, ComputeError, PyException);
create_exception!(exceptions, DuplicateError, PyException);
create_exception!(exceptions, InvalidOperationError, PyException);
create_exception!(exceptions, NoDataError, PyException);
create_exception!(exceptions, OutOfBoundsError, PyException);
create_exception!(exceptions, SchemaError, PyException);
create_exception!(exceptions, SchemaFieldNotFoundError, PyException);
create_exception!(exceptions, ShapeError, PyException);
Expand Down
5 changes: 4 additions & 1 deletion py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ use crate::conversion::Wrap;
use crate::dataframe::PyDataFrame;
use crate::error::{
ArrowErrorException, ColumnNotFoundError, ComputeError, DuplicateError, InvalidOperationError,
NoDataError, PyPolarsErr, SchemaError, SchemaFieldNotFoundError, StructFieldNotFoundError,
NoDataError, OutOfBoundsError, PyPolarsErr, SchemaError, SchemaFieldNotFoundError,
StructFieldNotFoundError,
};
use crate::expr::PyExpr;
use crate::lazyframe::PyLazyFrame;
Expand Down Expand Up @@ -243,6 +244,8 @@ fn polars(py: Python, m: &PyModule) -> PyResult<()> {
)
.unwrap();
m.add("NoDataError", py.get_type::<NoDataError>()).unwrap();
m.add("OutOfBoundsError", py.get_type::<OutOfBoundsError>())
.unwrap();
m.add("PolarsPanicError", py.get_type::<PanicException>())
.unwrap();
m.add("SchemaError", py.get_type::<SchemaError>()).unwrap();
Expand Down
35 changes: 29 additions & 6 deletions py-polars/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use polars_algo::hist;
use polars_core::series::IsSorted;
use polars_core::utils::flatten::flatten_series;
use polars_core::with_match_physical_numeric_polars_type;
use pyo3::exceptions::{PyRuntimeError, PyValueError};
use pyo3::exceptions::{PyIndexError, PyRuntimeError, PyValueError};
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use pyo3::Python;
Expand Down Expand Up @@ -154,20 +154,43 @@ impl PySeries {
}
}

fn get_idx(&self, py: Python, idx: usize) -> PyResult<PyObject> {
let av = self.series.get(idx).map_err(PyPolarsErr::from)?;
fn get_index(&self, py: Python, index: usize) -> PyResult<PyObject> {
let av = match self.series.get(index) {
Ok(v) => v,
Err(PolarsError::OutOfBounds(err)) => {
return Err(PyIndexError::new_err(err.to_string()))
},
Err(e) => return Err(PyPolarsErr::from(e).into()),
};

if let AnyValue::List(s) = av {
let pyseries = PySeries::new(s);
let out = POLARS
.getattr(py, "wrap_s")
.unwrap()
.call1(py, (pyseries,))
.unwrap();
return Ok(out.into_py(py));
}

Ok(Wrap(av).into_py(py))
}

Ok(out.into_py(py))
/// Get index but allow negative indices
fn get_index_signed(&self, py: Python, index: i64) -> PyResult<PyObject> {
let index = if index < 0 {
match self.len().checked_sub(index.unsigned_abs() as usize) {
Some(v) => v,
None => {
return Err(PyIndexError::new_err(
polars_err!(oob = index, self.len()).to_string(),
));
},
}
} else {
Ok(Wrap(self.series.get(idx).map_err(PyPolarsErr::from)?).into_py(py))
}
index as usize
};
self.get_index(py, index)
}

fn bitand(&self, other: &PySeries) -> PyResult<Self> {
Expand Down
25 changes: 0 additions & 25 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -3402,31 +3402,6 @@ def test_glimpse(capsys: Any) -> None:
assert result == expected


def test_item() -> None:
df = pl.DataFrame({"a": [1]})
assert df.item() == 1

df = pl.DataFrame({"a": [1, 2]})
with pytest.raises(ValueError, match=r".* frame has shape \(2, 1\)"):
df.item()

assert df.item(0, 0) == 1
assert df.item(1, "a") == 2

df = pl.DataFrame({"a": [1], "b": [2]})
with pytest.raises(ValueError, match=r".* frame has shape \(1, 2\)"):
df.item()

assert df.item(0, "a") == 1
assert df.item(0, "b") == 2

df = pl.DataFrame({})
with pytest.raises(ValueError, match=r".* frame has shape \(0, 0\)"):
df.item()
with pytest.raises(IndexError, match="column index 10 is out of bounds"):
df.item(0, 10)


@pytest.mark.parametrize(
("subset", "keep", "expected_mask"),
[
Expand Down
Loading

0 comments on commit 4eeaa00

Please sign in to comment.