Skip to content

Commit

Permalink
docs: Fix various instances of repeated words in docs and comments (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie authored Oct 29, 2024
1 parent 7a23e07 commit abe5139
Show file tree
Hide file tree
Showing 30 changed files with 34 additions and 33 deletions.
2 changes: 1 addition & 1 deletion crates/polars-arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {
new
}

/// Clones this [`Array`] with a new new assigned bitmap.
/// Clones this [`Array`] with a new assigned bitmap.
/// # Panic
/// This function panics iff `validity.len() != self.len()`.
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array>;
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-arrow/src/compute/cast/binary_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ pub fn fixed_size_binary_to_binview(from: &FixedSizeBinaryArray) -> BinaryViewAr
// This is NOT equal to MAX_BYTES_PER_BUFFER because of integer division
let split_point = num_elements_per_buffer * size;

// This is zero-copy for the buffer since split just increases the the data since
// This is zero-copy for the buffer since split just increases the data since
let mut buffer = from.values().clone();
let mut buffers = Vec::with_capacity(num_buffers);
for _ in 0..num_buffers - 1 {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/list/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ impl ListChunked {
let (s, ptr) =
unsafe { unstable_series_container_and_ptr(name, inner_values.clone(), &iter_dtype) };

// SAFETY: ptr belongs the the Series..
// SAFETY: ptr belongs the Series..
unsafe {
AmortizedListIter::new(
self.len(),
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/frame/row/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ pub fn rows_to_schema_first_non_null(
.iter_values()
.enumerate()
.filter_map(|(i, dtype)| {
// double check struct and list types types
// double check struct and list types
// nested null values can be wrongly inferred by front ends
match dtype {
DataType::Null | DataType::List(_) => Some(i),
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-expr/src/expressions/ternary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ impl PhysicalExpr for TernaryExpr {
// * `zip_with` can be called directly with the series
// * mix of unit literals and AggregatedList
// * `zip_with` can be called with the flat values after the offsets
// have been been checked for alignment
// have been checked for alignment
let ac_target = non_literal_acs.first().unwrap();

let agg_state_out = match ac_target.agg_state() {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-expr/src/expressions/window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,7 @@ where
unsafe { values.set_len(len) }
ChunkedArray::new_vec(ca.name().clone(), values).into_series()
} else {
// We don't use a mutable bitmap as bits will have have race conditions!
// We don't use a mutable bitmap as bits will have race conditions!
// A single byte might alias if we write from single threads.
let mut validity: Vec<bool> = vec![false; len];
let validity_ptr = validity.as_mut_ptr();
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ffi/src/version_0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ impl CallerContext {
self.bitflags |= 1 << k
}

/// Parallelism is done by polars' main engine, the plugin should not run run its own parallelism.
/// Parallelism is done by polars' main engine, the plugin should not run its own parallelism.
/// If this is `false`, the plugin could use parallelism without (much) contention with polars
/// parallelism strategies.
pub fn parallel(&self) -> bool {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/src/csv/write/write_impl/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@ pub(super) fn serializer_for<'a>(
quote_if_always!(decimal_serializer, scale.unwrap_or(0))
},
_ => {
polars_bail!(ComputeError: "datatype {dtype} cannot be written to CSV\n\nConsider using JSON or or a binary format.")
polars_bail!(ComputeError: "datatype {dtype} cannot be written to CSV\n\nConsider using JSON or a binary format.")
},
};
Ok(serializer)
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/src/ipc/ipc_file.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! # (De)serializing Arrows IPC format.
//!
//! Arrow IPC is a [binary format format](https://arrow.apache.org/docs/python/ipc.html).
//! Arrow IPC is a [binary format](https://arrow.apache.org/docs/python/ipc.html).
//! It is the recommended way to serialize and deserialize Polars DataFrames as this is most true
//! to the data schema.
//!
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/src/ipc/ipc_stream.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! # (De)serializing Arrows Streaming IPC format.
//!
//! Arrow Streaming IPC is a [binary format format](https://arrow.apache.org/docs/python/ipc.html).
//! Arrow Streaming IPC is a [binary format](https://arrow.apache.org/docs/python/ipc.html).
//! It used for sending an arbitrary length sequence of record batches.
//! The format must be processed from start to end, and does not support random access.
//! It is different than IPC, if you can't deserialize a file with `IpcReader::new`, it's probably an IPC Stream File.
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/src/parquet/read/read_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ fn assert_dtypes(dtype: &ArrowDataType) {
// These should all be casted to the BinaryView / Utf8View variants
D::Utf8 | D::Binary | D::LargeUtf8 | D::LargeBinary => unreachable!(),

// These should be casted to to Float32
// These should be casted to Float32
D::Float16 => unreachable!(),

// This should have been converted to a LargeList
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1349,7 +1349,7 @@ impl LazyFrame {
right_on: E,
args: JoinArgs,
) -> LazyFrame {
// if any of the nodes reads from files we must activate this this plan as well.
// if any of the nodes reads from files we must activate this plan as well.
if other.opt_state.contains(OptFlags::FILE_CACHING) {
self.opt_state |= OptFlags::FILE_CACHING;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ fn estimate_unique_count(keys: &[Column], mut sample_size: usize) -> PolarsResul

if keys.len() == 1 {
// we sample as that will work also with sorted data.
// not that sampling without replacement is very very expensive. don't do that.
// not that sampling without replacement is *very* expensive. don't do that.
let s = keys[0].sample_n(sample_size, true, false, None).unwrap();
// fast multi-threaded way to get unique.
let groups = s.as_materialized_series().group_tuples(true, false)?;
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/frame/join/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ pub fn _coalesce_full_join(
df_left: &DataFrame,
) -> DataFrame {
// No need to allocate the schema because we already
// know for certain that the column name for left left is `name`
// know for certain that the column name for left is `name`
// and for right is `name + suffix`
let schema_left = if keys_left == keys_right {
Schema::default()
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-ops/src/frame/pivot/positioning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,13 @@ pub(super) fn compute_col_idx(
let col_locations = match column_agg_physical.dtype() {
T::Int32 | T::UInt32 => {
let Some(BitRepr::Small(ca)) = column_agg_physical.bit_repr() else {
polars_bail!(ComputeError: "Expected 32-bit bit representation to be available. This should never happen");
polars_bail!(ComputeError: "Expected 32-bit representation to be available; this should never happen");
};
compute_col_idx_numeric(&ca)
},
T::Int64 | T::UInt64 => {
let Some(BitRepr::Large(ca)) = column_agg_physical.bit_repr() else {
polars_bail!(ComputeError: "Expected 64-bit bit representation to be available. This should never happen");
polars_bail!(ComputeError: "Expected 64-bit representation to be available; this should never happen");
};
compute_col_idx_numeric(&ca)
},
Expand Down Expand Up @@ -413,13 +413,13 @@ pub(super) fn compute_row_idx(
match index_agg_physical.dtype() {
T::Int32 | T::UInt32 => {
let Some(BitRepr::Small(ca)) = index_agg_physical.bit_repr() else {
polars_bail!(ComputeError: "Expected 32-bit bit representation to be available. This should never happen");
polars_bail!(ComputeError: "Expected 32-bit representation to be available; this should never happen");
};
compute_row_index(index, &ca, count, index_s.dtype())
},
T::Int64 | T::UInt64 => {
let Some(BitRepr::Large(ca)) = index_agg_physical.bit_repr() else {
polars_bail!(ComputeError: "Expected 64-bit bit representation to be available. This should never happen");
polars_bail!(ComputeError: "Expected 64-bit representation to be available; this should never happen");
};
compute_row_index(index, &ca, count, index_s.dtype())
},
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-parquet/src/arrow/read/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ impl Default for SchemaInferenceOptions {
///
/// # Error
/// This function errors iff the key `"ARROW:schema"` exists but is not correctly encoded,
/// indicating that that the file's arrow metadata was incorrectly written.
/// indicating that the file's arrow metadata was incorrectly written.
pub fn infer_schema(file_metadata: &FileMetadata) -> PolarsResult<ArrowSchema> {
infer_schema_with_options(file_metadata, &None)
}
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-parquet/src/parquet/read/page/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ impl PageReader {
Self::new_with_page_meta(reader, column.into(), scratch, max_page_size)
}

/// Create a a new [`PageReader`] with [`PageMetaData`].
/// Create a new [`PageReader`] with [`PageMetaData`].
///
/// It assumes that the reader has been `sought` (`seek`) to the beginning of `column`.
pub fn new_with_page_meta(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::pipeline::{morsels_per_sink, FORCE_OOC};
pub(super) struct OocState {
// OOC
// Stores available memory in the system at the start of this sink.
// and stores the memory used by this this sink.
// and stores the memory used by this sink.
mem_track: MemTracker,
// sort in-memory or out-of-core
pub(super) ooc: bool,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::pipeline::morsels_per_sink;
pub(super) struct OocState {
// OOC
// Stores available memory in the system at the start of this sink.
// and stores the memory used by this this sink.
// and stores the memory used by this sink.
_mem_track: MemTracker,
// sort in-memory or out-of-core
pub(super) ooc: bool,
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-pipe/src/executors/sinks/joins/cross.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ impl Operator for CrossJoinProbe {
_context: &PExecutionContext,
chunk: &DataChunk,
) -> PolarsResult<OperatorResult> {
// Expected output is size**2, so this needs to be a a small number.
// Expected output is size**2, so this needs to be a small number.
// However, if one of the DataFrames is much smaller than 250, we want
// to take rather more from the other DataFrame so we don't end up with
// overly small chunks.
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-pipe/src/executors/sinks/sort/sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub struct SortSink {
schema: SchemaRef,
chunks: Vec<DataFrame>,
// Stores available memory in the system at the start of this sink.
// and stores the memory used by this this sink.
// and stores the memory used by this sink.
mem_track: MemTracker,
// sort in-memory or out-of-core
ooc: bool,
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-pipe/src/operators/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub(crate) fn chunks_to_df_unchecked(chunks: Vec<DataChunk>) -> DataFrame {
///
/// The benefit of having a series of `DataFrame` that are e.g. 4MB each that
/// are then made contiguous is that you're not using a lot of memory (an extra
/// 4MB), but you're still doing better than if you had a series of of 2KB
/// 4MB), but you're still doing better than if you had a series of 2KB
/// `DataFrame`s.
///
/// Changing the `DataFrame` into contiguous chunks is the caller's
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/functions/syntactic_sugar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ pub fn is_not_null(expr: Expr) -> Expr {
/// Casts the column given by `Expr` to a different type.
///
/// Follows the rules of Rust casting, with the exception that integers and floats can be cast to `DataType::Date` and
/// `DataType::DateTime(_, _)`. A column consisting entirely of of `Null` can be cast to any type, regardless of the
/// `DataType::DateTime(_, _)`. A column consisting entirely of `Null` can be cast to any type, regardless of the
/// nominal type of the column.
pub fn cast(expr: Expr, dtype: DataType) -> Expr {
Expr::Cast {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ pub fn optimize(root: Node, lp_arena: &mut Arena<IR>, expr_arena: &Arena<AExpr>)
// @NOTE: Pruning of re-assigned columns
//
// We checked if this expression output is also assigned by the input and
// that that assignment is not used in the current WITH_COLUMNS.
// that this assignment is not used in the current WITH_COLUMNS.
// Consequently, we are free to prune the input's assignment to the output.
//
// We immediately prune here to simplify the later code.
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-plan/src/plans/optimizer/simplify_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ impl OptimizationRule for SimplifyBooleanRule {
AExpr::Literal(LiteralValue::Boolean(true))
) && in_filter =>
{
// Only in filter as we we might change the name from "literal"
// Only in filter as we might change the name from "literal"
// to whatever lhs columns is.
return Ok(Some(expr_arena.get(*right).clone()));
},
Expand Down Expand Up @@ -210,7 +210,7 @@ impl OptimizationRule for SimplifyBooleanRule {
AExpr::Literal(LiteralValue::Boolean(false))
) && in_filter =>
{
// Only in filter as we we might change the name from "literal"
// Only in filter as we might change the name from "literal"
// to whatever lhs columns is.
return Ok(Some(expr_arena.get(*right).clone()));
},
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-python/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub struct PyFileLikeObject {
/// Wraps a `PyObject`, and implements read, seek, and write for it.
impl PyFileLikeObject {
/// Creates an instance of a `PyFileLikeObject` from a `PyObject`.
/// To assert the object has the required methods methods,
/// To assert the object has the required methods,
/// instantiate it with `PyFileLikeObject::require`
pub fn new(object: PyObject) -> Self {
PyFileLikeObject { inner: object }
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-utils/src/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ impl<T> IntoRawParts<T> for Vec<T> {
}
}

/// Fill current allocation if if > 0
/// Fill current allocation if > 0
/// otherwise realloc
pub trait ResizeFaster<T: Copy> {
fn fill_or_alloc(&mut self, new_len: usize, value: T);
Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/convert/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,7 @@ def _from_dataframe_repr(m: re.Match[str]) -> DataFrame:
if schema and data and (n_extend_cols := (len(schema) - len(data))) > 0:
empty_data = [None] * len(data[0])
data.extend((pl.Series(empty_data, dtype=String)) for _ in range(n_extend_cols))

for dtype in set(schema.values()):
if dtype in (List, Struct, Object):
msg = (
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1980,7 +1980,7 @@ def to_jax(
Create the Array on a specific GPU device:
>>> gpu_device = jax.devices("gpu")[1]) # doctest: +SKIP
>>> gpu_device = jax.devices("gpu")[1] # doctest: +SKIP
>>> a = df.to_jax(device=gpu_device) # doctest: +SKIP
>>> a.device() # doctest: +SKIP
GpuDevice(id=1, process_index=0)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/series/test_scatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_scatter() -> None:
assert s.to_list() == ["a", "x", "x"]
assert s.scatter([0, 2], 0.12345).to_list() == ["0.12345", "x", "0.12345"]

# set multiple values values
# set multiple values
s = pl.Series(["z", "z", "z"])
assert s.scatter([0, 1], ["a", "b"]).to_list() == ["a", "b", "z"]
s = pl.Series([True, False, True])
Expand Down

0 comments on commit abe5139

Please sign in to comment.