@@ -228,13 +228,13 @@ pub struct ListingOptions {
228
228
impl ListingOptions {
229
229
/// Creates an options instance with the given format
230
230
/// Default values:
231
- /// - no file extension filter
231
+ /// - use default file extension filter
232
232
/// - no input partition to discover
233
233
/// - one target partition
234
234
/// - stat collection
235
235
pub fn new ( format : Arc < dyn FileFormat > ) -> Self {
236
236
Self {
237
- file_extension : String :: new ( ) ,
237
+ file_extension : format . get_ext ( ) ,
238
238
format,
239
239
table_partition_cols : vec ! [ ] ,
240
240
collect_stat : true ,
@@ -1314,6 +1314,7 @@ mod tests {
1314
1314
"test:///bucket/key-prefix/" ,
1315
1315
12 ,
1316
1316
5 ,
1317
+ Some ( "" ) ,
1317
1318
)
1318
1319
. await ?;
1319
1320
@@ -1328,6 +1329,7 @@ mod tests {
1328
1329
"test:///bucket/key-prefix/" ,
1329
1330
4 ,
1330
1331
4 ,
1332
+ Some ( "" ) ,
1331
1333
)
1332
1334
. await ?;
1333
1335
@@ -1343,12 +1345,19 @@ mod tests {
1343
1345
"test:///bucket/key-prefix/" ,
1344
1346
2 ,
1345
1347
2 ,
1348
+ Some ( "" ) ,
1346
1349
)
1347
1350
. await ?;
1348
1351
1349
1352
// no files => no groups
1350
- assert_list_files_for_scan_grouping ( & [ ] , "test:///bucket/key-prefix/" , 2 , 0 )
1351
- . await ?;
1353
+ assert_list_files_for_scan_grouping (
1354
+ & [ ] ,
1355
+ "test:///bucket/key-prefix/" ,
1356
+ 2 ,
1357
+ 0 ,
1358
+ Some ( "" ) ,
1359
+ )
1360
+ . await ?;
1352
1361
1353
1362
// files that don't match the prefix
1354
1363
assert_list_files_for_scan_grouping (
@@ -1360,6 +1369,21 @@ mod tests {
1360
1369
"test:///bucket/key-prefix/" ,
1361
1370
10 ,
1362
1371
2 ,
1372
+ Some ( "" ) ,
1373
+ )
1374
+ . await ?;
1375
+
1376
+ // files that don't match the prefix or the default file extention
1377
+ assert_list_files_for_scan_grouping (
1378
+ & [
1379
+ "bucket/key-prefix/file0.avro" ,
1380
+ "bucket/key-prefix/file1.parquet" ,
1381
+ "bucket/other-prefix/roguefile.avro" ,
1382
+ ] ,
1383
+ "test:///bucket/key-prefix/" ,
1384
+ 10 ,
1385
+ 1 ,
1386
+ None ,
1363
1387
)
1364
1388
. await ?;
1365
1389
Ok ( ( ) )
@@ -1380,6 +1404,7 @@ mod tests {
1380
1404
& [ "test:///bucket/key1/" , "test:///bucket/key2/" ] ,
1381
1405
12 ,
1382
1406
5 ,
1407
+ Some ( "" ) ,
1383
1408
)
1384
1409
. await ?;
1385
1410
@@ -1396,6 +1421,7 @@ mod tests {
1396
1421
& [ "test:///bucket/key1/" , "test:///bucket/key2/" ] ,
1397
1422
5 ,
1398
1423
5 ,
1424
+ Some ( "" ) ,
1399
1425
)
1400
1426
. await ?;
1401
1427
@@ -1412,11 +1438,13 @@ mod tests {
1412
1438
& [ "test:///bucket/key1/" ] ,
1413
1439
2 ,
1414
1440
2 ,
1441
+ Some ( "" ) ,
1415
1442
)
1416
1443
. await ?;
1417
1444
1418
1445
// no files => no groups
1419
- assert_list_files_for_multi_paths ( & [ ] , & [ "test:///bucket/key1/" ] , 2 , 0 ) . await ?;
1446
+ assert_list_files_for_multi_paths ( & [ ] , & [ "test:///bucket/key1/" ] , 2 , 0 , Some ( "" ) )
1447
+ . await ?;
1420
1448
1421
1449
// files that don't match the prefix
1422
1450
assert_list_files_for_multi_paths (
@@ -1431,6 +1459,24 @@ mod tests {
1431
1459
& [ "test:///bucket/key3/" ] ,
1432
1460
2 ,
1433
1461
1 ,
1462
+ Some ( "" ) ,
1463
+ )
1464
+ . await ?;
1465
+
1466
+ // files that don't match the prefix or the default file ext
1467
+ assert_list_files_for_multi_paths (
1468
+ & [
1469
+ "bucket/key1/file0.avro" ,
1470
+ "bucket/key1/file1.csv" ,
1471
+ "bucket/key1/file2.avro" ,
1472
+ "bucket/key2/file3.csv" ,
1473
+ "bucket/key2/file4.avro" ,
1474
+ "bucket/key3/file5.csv" ,
1475
+ ] ,
1476
+ & [ "test:///bucket/key1/" , "test:///bucket/key3/" ] ,
1477
+ 2 ,
1478
+ 2 ,
1479
+ None ,
1434
1480
)
1435
1481
. await ?;
1436
1482
Ok ( ( ) )
@@ -1458,14 +1504,15 @@ mod tests {
1458
1504
table_prefix : & str ,
1459
1505
target_partitions : usize ,
1460
1506
output_partitioning : usize ,
1507
+ file_ext : Option < & str > ,
1461
1508
) -> Result < ( ) > {
1462
1509
let ctx = SessionContext :: new ( ) ;
1463
1510
register_test_store ( & ctx, & files. iter ( ) . map ( |f| ( * f, 10 ) ) . collect :: < Vec < _ > > ( ) ) ;
1464
1511
1465
1512
let format = AvroFormat { } ;
1466
1513
1467
1514
let opt = ListingOptions :: new ( Arc :: new ( format) )
1468
- . with_file_extension ( "" )
1515
+ . with_file_extension_opt ( file_ext )
1469
1516
. with_target_partitions ( target_partitions) ;
1470
1517
1471
1518
let schema = Schema :: new ( vec ! [ Field :: new( "a" , DataType :: Boolean , false ) ] ) ;
@@ -1491,14 +1538,15 @@ mod tests {
1491
1538
table_prefix : & [ & str ] ,
1492
1539
target_partitions : usize ,
1493
1540
output_partitioning : usize ,
1541
+ file_ext : Option < & str > ,
1494
1542
) -> Result < ( ) > {
1495
1543
let ctx = SessionContext :: new ( ) ;
1496
1544
register_test_store ( & ctx, & files. iter ( ) . map ( |f| ( * f, 10 ) ) . collect :: < Vec < _ > > ( ) ) ;
1497
1545
1498
1546
let format = AvroFormat { } ;
1499
1547
1500
1548
let opt = ListingOptions :: new ( Arc :: new ( format) )
1501
- . with_file_extension ( "" )
1549
+ . with_file_extension_opt ( file_ext )
1502
1550
. with_target_partitions ( target_partitions) ;
1503
1551
1504
1552
let schema = Schema :: new ( vec ! [ Field :: new( "a" , DataType :: Boolean , false ) ] ) ;
0 commit comments