Skip to content

Commit

Permalink
Merge pull request #57 from Merck/ext-binary
Browse files Browse the repository at this point in the history
Expand binary file extensions dictionary
  • Loading branch information
nanxstats authored Nov 9, 2024
2 parents 3667507 + de646d2 commit 9022573
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 9 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: pkglite
Title: Compact Package Representations
Version: 0.2.3.9000
Version: 0.2.3.9001
Authors@R: c(
person("Nan", "Xiao", email = "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-0250-5673")),
Expand Down
4 changes: 3 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# pkglite 0.2.3.9000
# pkglite 0.2.3.9001

## Enhancements

- Increase the default file specification coverage for `file_src()`.
This helps identifying the correct file collections for R packages
with `src/Makevars` or `src/Makefile`, for example, packages that
interface with Stan via the rstan package (#56).
- Expand the binary file extension dictionary to cover files
frequently used in machine learning frameworks (#57).

# pkglite 0.2.3

Expand Down
5 changes: 3 additions & 2 deletions R/dictionary.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,9 @@ ext_binary <- function(flat = FALSE) {
"audio" = c("wav", "mp3", "mid", "ogg", "au", "m4a"),
"video" = c("mp4", "avi", "mov", "mkv", "webm"),
"generic" = c(
"bin", "epub", "h5", "hdf5", "onnx",
"parquet", "feather", "pkl", "npy"
"bin", "epub", "hdf5", "h5", "parquet", "feather", "msgpack",
"pickle", "pkl", "npy", "npz", "safetensors",
"pt", "pth", "keras", "tfrecord", "pb", "ckpt", "onnx"
)
)
if (flat) unique(unlist(x)) else x
Expand Down
5 changes: 3 additions & 2 deletions tests/testthat/test-independent-test_dictionary.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ test_that("Test ext_binary() generate the right string list", {
"audio" = c("wav", "mp3", "mid", "ogg", "au", "m4a"),
"video" = c("mp4", "avi", "mov", "mkv", "webm"),
"generic" = c(
"bin", "epub", "h5", "hdf5", "onnx",
"parquet", "feather", "pkl", "npy"
"bin", "epub", "hdf5", "h5", "parquet", "feather", "msgpack",
"pickle", "pkl", "npy", "npz", "safetensors",
"pt", "pth", "keras", "tfrecord", "pb", "ckpt", "onnx"
)
)

Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-independent-test_templates.R
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ test_that("file_vignettes() creates the correct 'file_spec' objects", {
(!is_spec_binary & is_file_spec_type(
fs_source = fs,
path = "vignettes/",
pattern = "\\.rda$|\\.rds$|\\.RData$|\\.jpg$|\\.jpeg$|\\.pdf$|\\.png$|\\.bmp$|\\.gif$|\\.tif$|\\.tiff$|\\.emf$|\\.svgz$|\\.ico$|\\.webp$|\\.eps$|\\.ppm$|\\.pgm$|\\.pbm$|\\.pnm$|\\.xcf$|\\.psd$|\\.graffle$|\\.o$|\\.so$|\\.rdb$|\\.rdx$|\\.woff2$|\\.woff$|\\.otf$|\\.ttf$|\\.eot$|\\.docx$|\\.xlsx$|\\.pptx$|\\.xltx$|\\.potx$|\\.doc$|\\.xls$|\\.ppt$|\\.xlsb$|\\.xlsm$|\\.odt$|\\.ods$|\\.odp$|\\.odg$|\\.odc$|\\.odf$|\\.odi$|\\.odm$|\\.odb$|\\.sas7bdat$|\\.sas7bcat$|\\.xpt$|\\.xpt5$|\\.xpt8$|\\.zip$|\\.tar$|\\.gz$|\\.tgz$|\\.bz2$|\\.7z$|\\.xz$|\\.sqlite$|\\.sqlite3$|\\.dbf$|\\.accdb$|\\.mdb$|\\.pyc$|\\.jar$|\\.mo$|\\.shx$|\\.shp$|\\.laz$|\\.sbx$|\\.sbn$|\\.nc$|\\.gpkg$|\\.bam$|\\.bai$|\\.wav$|\\.mp3$|\\.mid$|\\.ogg$|\\.au$|\\.m4a$|\\.mp4$|\\.avi$|\\.mov$|\\.mkv$|\\.webm$|\\.bin$|\\.epub$|\\.h5$|\\.hdf5$|\\.onnx$|\\.parquet$|\\.feather$|\\.pkl$|\\.npy$",
pattern = "\\.rda$|\\.rds$|\\.RData$|\\.jpg$|\\.jpeg$|\\.pdf$|\\.png$|\\.bmp$|\\.gif$|\\.tif$|\\.tiff$|\\.emf$|\\.svgz$|\\.ico$|\\.webp$|\\.eps$|\\.ppm$|\\.pgm$|\\.pbm$|\\.pnm$|\\.xcf$|\\.psd$|\\.graffle$|\\.o$|\\.so$|\\.rdb$|\\.rdx$|\\.woff2$|\\.woff$|\\.otf$|\\.ttf$|\\.eot$|\\.docx$|\\.xlsx$|\\.pptx$|\\.xltx$|\\.potx$|\\.doc$|\\.xls$|\\.ppt$|\\.xlsb$|\\.xlsm$|\\.odt$|\\.ods$|\\.odp$|\\.odg$|\\.odc$|\\.odf$|\\.odi$|\\.odm$|\\.odb$|\\.sas7bdat$|\\.sas7bcat$|\\.xpt$|\\.xpt5$|\\.xpt8$|\\.zip$|\\.tar$|\\.gz$|\\.tgz$|\\.bz2$|\\.7z$|\\.xz$|\\.sqlite$|\\.sqlite3$|\\.dbf$|\\.accdb$|\\.mdb$|\\.pyc$|\\.jar$|\\.mo$|\\.shx$|\\.shp$|\\.laz$|\\.sbx$|\\.sbn$|\\.nc$|\\.gpkg$|\\.bam$|\\.bai$|\\.wav$|\\.mp3$|\\.mid$|\\.ogg$|\\.au$|\\.m4a$|\\.mp4$|\\.avi$|\\.mov$|\\.mkv$|\\.webm$|\\.bin$|\\.epub$|\\.hdf5$|\\.h5$|\\.parquet$|\\.feather$|\\.msgpack$|\\.pickle$|\\.pkl$|\\.npy$|\\.npz$|\\.safetensors$|\\.pt$|\\.pth$|\\.keras$|\\.tfrecord$|\\.pb$|\\.ckpt$|\\.onnx$",
format = "binary",
recursive = TRUE,
ignore_case = TRUE,
Expand Down Expand Up @@ -332,7 +332,7 @@ test_that("file_default() creates the correct 'file_spec' objects", {
(!is_spec_binary & is_file_spec_type(
fs_source = fs,
path = "vignettes/",
pattern = "\\.rda$|\\.rds$|\\.RData$|\\.jpg$|\\.jpeg$|\\.pdf$|\\.png$|\\.bmp$|\\.gif$|\\.tif$|\\.tiff$|\\.emf$|\\.svgz$|\\.ico$|\\.webp$|\\.eps$|\\.ppm$|\\.pgm$|\\.pbm$|\\.pnm$|\\.xcf$|\\.psd$|\\.graffle$|\\.o$|\\.so$|\\.rdb$|\\.rdx$|\\.woff2$|\\.woff$|\\.otf$|\\.ttf$|\\.eot$|\\.docx$|\\.xlsx$|\\.pptx$|\\.xltx$|\\.potx$|\\.doc$|\\.xls$|\\.ppt$|\\.xlsb$|\\.xlsm$|\\.odt$|\\.ods$|\\.odp$|\\.odg$|\\.odc$|\\.odf$|\\.odi$|\\.odm$|\\.odb$|\\.sas7bdat$|\\.sas7bcat$|\\.xpt$|\\.xpt5$|\\.xpt8$|\\.zip$|\\.tar$|\\.gz$|\\.tgz$|\\.bz2$|\\.7z$|\\.xz$|\\.sqlite$|\\.sqlite3$|\\.dbf$|\\.accdb$|\\.mdb$|\\.pyc$|\\.jar$|\\.mo$|\\.shx$|\\.shp$|\\.laz$|\\.sbx$|\\.sbn$|\\.nc$|\\.gpkg$|\\.bam$|\\.bai$|\\.wav$|\\.mp3$|\\.mid$|\\.ogg$|\\.au$|\\.m4a$|\\.mp4$|\\.avi$|\\.mov$|\\.mkv$|\\.webm$|\\.bin$|\\.epub$|\\.h5$|\\.hdf5$|\\.onnx$|\\.parquet$|\\.feather$|\\.pkl$|\\.npy$",
pattern = "\\.rda$|\\.rds$|\\.RData$|\\.jpg$|\\.jpeg$|\\.pdf$|\\.png$|\\.bmp$|\\.gif$|\\.tif$|\\.tiff$|\\.emf$|\\.svgz$|\\.ico$|\\.webp$|\\.eps$|\\.ppm$|\\.pgm$|\\.pbm$|\\.pnm$|\\.xcf$|\\.psd$|\\.graffle$|\\.o$|\\.so$|\\.rdb$|\\.rdx$|\\.woff2$|\\.woff$|\\.otf$|\\.ttf$|\\.eot$|\\.docx$|\\.xlsx$|\\.pptx$|\\.xltx$|\\.potx$|\\.doc$|\\.xls$|\\.ppt$|\\.xlsb$|\\.xlsm$|\\.odt$|\\.ods$|\\.odp$|\\.odg$|\\.odc$|\\.odf$|\\.odi$|\\.odm$|\\.odb$|\\.sas7bdat$|\\.sas7bcat$|\\.xpt$|\\.xpt5$|\\.xpt8$|\\.zip$|\\.tar$|\\.gz$|\\.tgz$|\\.bz2$|\\.7z$|\\.xz$|\\.sqlite$|\\.sqlite3$|\\.dbf$|\\.accdb$|\\.mdb$|\\.pyc$|\\.jar$|\\.mo$|\\.shx$|\\.shp$|\\.laz$|\\.sbx$|\\.sbn$|\\.nc$|\\.gpkg$|\\.bam$|\\.bai$|\\.wav$|\\.mp3$|\\.mid$|\\.ogg$|\\.au$|\\.m4a$|\\.mp4$|\\.avi$|\\.mov$|\\.mkv$|\\.webm$|\\.bin$|\\.epub$|\\.hdf5$|\\.h5$|\\.parquet$|\\.feather$|\\.msgpack$|\\.pickle$|\\.pkl$|\\.npy$|\\.npz$|\\.safetensors$|\\.pt$|\\.pth$|\\.keras$|\\.tfrecord$|\\.pb$|\\.ckpt$|\\.onnx$",
format = "binary",
recursive = TRUE,
ignore_case = TRUE,
Expand Down Expand Up @@ -534,7 +534,7 @@ test_that("file_auto() creates the correct 'file_spec' objects", {
(!is_spec_binary & is_file_spec_type(
fs_source = fs,
path = "inst/",
pattern = "\\.rda$|\\.rds$|\\.RData$|\\.jpg$|\\.jpeg$|\\.pdf$|\\.png$|\\.bmp$|\\.gif$|\\.tif$|\\.tiff$|\\.emf$|\\.svgz$|\\.ico$|\\.webp$|\\.eps$|\\.ppm$|\\.pgm$|\\.pbm$|\\.pnm$|\\.xcf$|\\.psd$|\\.graffle$|\\.o$|\\.so$|\\.rdb$|\\.rdx$|\\.woff2$|\\.woff$|\\.otf$|\\.ttf$|\\.eot$|\\.docx$|\\.xlsx$|\\.pptx$|\\.xltx$|\\.potx$|\\.doc$|\\.xls$|\\.ppt$|\\.xlsb$|\\.xlsm$|\\.odt$|\\.ods$|\\.odp$|\\.odg$|\\.odc$|\\.odf$|\\.odi$|\\.odm$|\\.odb$|\\.sas7bdat$|\\.sas7bcat$|\\.xpt$|\\.xpt5$|\\.xpt8$|\\.zip$|\\.tar$|\\.gz$|\\.tgz$|\\.bz2$|\\.7z$|\\.xz$|\\.sqlite$|\\.sqlite3$|\\.dbf$|\\.accdb$|\\.mdb$|\\.pyc$|\\.jar$|\\.mo$|\\.shx$|\\.shp$|\\.laz$|\\.sbx$|\\.sbn$|\\.nc$|\\.gpkg$|\\.bam$|\\.bai$|\\.wav$|\\.mp3$|\\.mid$|\\.ogg$|\\.au$|\\.m4a$|\\.mp4$|\\.avi$|\\.mov$|\\.mkv$|\\.webm$|\\.bin$|\\.epub$|\\.h5$|\\.hdf5$|\\.onnx$|\\.parquet$|\\.feather$|\\.pkl$|\\.npy$",
pattern = "\\.rda$|\\.rds$|\\.RData$|\\.jpg$|\\.jpeg$|\\.pdf$|\\.png$|\\.bmp$|\\.gif$|\\.tif$|\\.tiff$|\\.emf$|\\.svgz$|\\.ico$|\\.webp$|\\.eps$|\\.ppm$|\\.pgm$|\\.pbm$|\\.pnm$|\\.xcf$|\\.psd$|\\.graffle$|\\.o$|\\.so$|\\.rdb$|\\.rdx$|\\.woff2$|\\.woff$|\\.otf$|\\.ttf$|\\.eot$|\\.docx$|\\.xlsx$|\\.pptx$|\\.xltx$|\\.potx$|\\.doc$|\\.xls$|\\.ppt$|\\.xlsb$|\\.xlsm$|\\.odt$|\\.ods$|\\.odp$|\\.odg$|\\.odc$|\\.odf$|\\.odi$|\\.odm$|\\.odb$|\\.sas7bdat$|\\.sas7bcat$|\\.xpt$|\\.xpt5$|\\.xpt8$|\\.zip$|\\.tar$|\\.gz$|\\.tgz$|\\.bz2$|\\.7z$|\\.xz$|\\.sqlite$|\\.sqlite3$|\\.dbf$|\\.accdb$|\\.mdb$|\\.pyc$|\\.jar$|\\.mo$|\\.shx$|\\.shp$|\\.laz$|\\.sbx$|\\.sbn$|\\.nc$|\\.gpkg$|\\.bam$|\\.bai$|\\.wav$|\\.mp3$|\\.mid$|\\.ogg$|\\.au$|\\.m4a$|\\.mp4$|\\.avi$|\\.mov$|\\.mkv$|\\.webm$|\\.bin$|\\.epub$|\\.hdf5$|\\.h5$|\\.parquet$|\\.feather$|\\.msgpack$|\\.pickle$|\\.pkl$|\\.npy$|\\.npz$|\\.safetensors$|\\.pt$|\\.pth$|\\.keras$|\\.tfrecord$|\\.pb$|\\.ckpt$|\\.onnx$",
format = "binary",
recursive = TRUE,
ignore_case = TRUE,
Expand Down

0 comments on commit 9022573

Please sign in to comment.