diff --git a/crates/polars-plan/src/plans/optimizer/projection_pushdown/mod.rs b/crates/polars-plan/src/plans/optimizer/projection_pushdown/mod.rs index 7a84c6990fda..55d5501dd44e 100644 --- a/crates/polars-plan/src/plans/optimizer/projection_pushdown/mod.rs +++ b/crates/polars-plan/src/plans/optimizer/projection_pushdown/mod.rs @@ -511,6 +511,21 @@ impl ProjectionPushDown { file_options.row_index = None; } }; + + if let Some(col_name) = &file_options.include_file_paths { + if output_schema + .as_ref() + .map_or(false, |schema| !schema.contains(col_name)) + { + // Need to remove it from the input schema so + // that projection indices are correct. + let mut file_schema = Arc::unwrap_or_clone(file_info.schema); + file_schema.shift_remove(col_name); + file_info.schema = Arc::new(file_schema); + file_options.include_file_paths = None; + } + }; + let lp = Scan { sources, file_info, diff --git a/py-polars/tests/unit/io/test_scan.py b/py-polars/tests/unit/io/test_scan.py index 799c4953cbf6..4977fa115749 100644 --- a/py-polars/tests/unit/io/test_scan.py +++ b/py-polars/tests/unit/io/test_scan.py @@ -801,3 +801,11 @@ def test_scan_double_collect_row_index_invalidates_cached_ir_18892() -> None: schema={"index": pl.UInt32, "a": pl.Int64}, ), ) + + +def test_scan_include_file_paths_respects_projection_pushdown() -> None: + q = pl.scan_csv(b"a,b,c\na1,b1,c1", include_file_paths="path_name").select( + ["a", "b"] + ) + + assert_frame_equal(q.collect(), pl.DataFrame({"a": "a1", "b": "b1"}))