Skip to content

Latest commit

 

History

History
559 lines (481 loc) · 18.6 KB

papisr.src.org

File metadata and controls

559 lines (481 loc) · 18.6 KB

papisr - R workflow for tabulating papis records

frontpage

:export_options+: author:nil :export_options+: title:nil

papisr

Installation

## Installs and loads papisr
devtools::install_github("stasvlasov/papisr")
library("papisr")

Usage

Suppose you have a papis library located in testdata/papis that includes 4 simple bibliographic records (see below for the content of files). This example papis library is provided with the package and accessible with system.file()

Now, for example, if you want to tabulate year and url fields and number of tags for each record that is not tagged as ‘classics’ you can do it with the following simple script:

system.file("testdata", "papis", package = "papisr") |>
    papisr::collect_papis_records(!("classics" %in% info$tags)) |>
    papisr::tabulate_papis_records(`Year` = info$year
                                 , `URL` = info$url
                                 , `No. of tags` = length(info$tags))

The script returns the following data.frame:

YearURLNo. of tags
2022example.com2
1985uvt.nl2
22221

Here 3 out of 4 records were tabulated because the one records with tag “classics” was filtered out.

Test records in papis format

testdata/papis/a/info.yml

tags:
  - data
  - research
url: example.com
year: 2022

testdata/papis/b/info.yml

tags:
  - research
  - phd
url: uvt.nl
year: 1985

testdata/papis/c/INFO.YML

tags: data
year: 2222

testdata/papis/d/info.yaml

tags: classics
year: 2000

deploy

(save-excursion
  (let ((calls '(
                 "reset-working-directory"
                 "readme-header"
                 "tangle-readme"
                 "tangle-buffer"
                 "description-and-license"
                 "roxygenize"
                 "setup-tinytest"
                 ))
        ;; turn off babel prompts
        org-confirm-babel-evaluate)
    (mapcar
     (lambda (name)
       (save-excursion
         (org-babel-goto-named-src-block name)
         (or (org-babel-execute-src-block-maybe)
             (org-babel-lob-execute-maybe))))
     calls)))

reset working directory

## Remove tangled and generated files
## --------------------------------------------------------------------------------
dirs_to_remove <-c(
    "man"
  , "inst"
  , "tests"
  , "data"
  , "R"
)

files_to_remove <- c(
    "DESCRIPTION"
  , "NAMESPACE"
  , "LICENSE.md"
  , "LICENSE"
  , "README.md"
  , ".gitignore"
  , ".Rbuildignore"
)

dirs_to_remove |>
    lapply(list.files
         , recursive = TRUE
         , full.names = TRUE
         , include.dirs = TRUE) |>
    unlist() |>
    rev() |>
    c(dirs_to_remove, files_to_remove) |>
    sapply(\(file) if(file.exists(file)) file.remove(file))

tangle sources

(org-babel-tangle)

tinytest setup

tinytest::setup_tinytest(".")

github workflows and badges setup

https://github.com/r-lib/actions/blob/v1/examples/README.md

usethis::use_github_action("check-release")
usethis::use_github_action("test-coverage")
usethis::use_github_action("pkgdown")
usethis::use_github_actions_badge(name = "R-CMD-check")

.gitignore

*.project.org
.DS_Store

# History files
.Rhistory
.Rapp.history

# Session Data files
.RData

# User-specific files
.Ruserdata

# Example code in package build process
*-Ex.R

# Output files from R CMD build
/*.tar.gz

# Output files from R CMD check
/*.Rcheck/

# RStudio files
.Rproj.user/

# produced vignettes
vignettes/*.html
vignettes/*.pdf

# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
.httr-oauth

# knitr and R markdown default cache directories
*_cache/
/cache/

# Temporary files created by R markdown
*.utf8.md
*.knit.md

# R Environment Variables
.Renviron

.Rbuildignore

^LICENSE\.md$
^\.github$
^_pkgdown\.yml$
^docs$
^pkgdown$

README.md

(require 'org-goto)
(save-excursion
  (org-goto--local-search-headings "frontpage" nil t)
  ;; (org-pandoc-export-to-markdown nil 'subtreep)
  (org-md-export-to-markdown nil 'subtreep)
  )

Description and License

nameslink
tinytesthttps://github.com/markvanderloo/tinytest/blob/master/pkg/README.md
checkmatehttps://mllg.github.io/checkmate/
packagescurrent_versionensure_versionlink
Rnil4.0
yaml2.3.52.3.0R package for converting objects to and from YAML
packageVersion(pkg)
## set description
suppressWarnings(file.remove("DESCRIPTION"))
list(Title  = "<<package-title>>"
   , Description = "<<package-description>>"
   , Date = Sys.Date()
   , `Authors@R` = 'as.person("Stanislav Vlasov <[email protected]> [aut, cre]")'
   , Version = "0.0.0.9000"
   , URL = "https://stasvlasov.github.io/papisr/"
   , BugReports = "https://github.com/stasvlasov/papisr/issues"
   , Suggests = paste(suggest_packages$names, collapse = ", ")
   , References = 'Alejandro Gallo, Nicolò Balzarotti, Michael Plews, Alex Fikl, Jackson Woodruff, Matthieu Coudron, Alexander Von Moll, gouderm, Sébastien M. Popoff, Henrik Grimler, JP-Ellis, Katrin Leinweber, Manuel Haussmann, Andrew Ramsey, Andrey Akinshin, CosmosAtlas, dbruggner, hayk, Henning Timm, … prataffel. (2022). papis/papis: VERSION 0.12 (v0.12). Zenodo. https://doi.org/10.5281/zenodo.6573964'
   , Depends = paste(
         paste0(dependencies$packages, " (>= ", dependencies$ensure_version, ")")
       , collapse = ", ")) |>
    usethis::use_description()
## Set license
suppressWarnings(file.remove("LICENSE", "LICENSE.md"))
usethis::use_mit_license()
## usethis::use_lgpl_license()
## usethis::use_apache_license()  

Documentation

#' @details
#' <<package-title>>
#' 
#' <<package-description>>
#' @md
"_PACKAGE"
## Update name spaces and documentation for functions
roxygen2::roxygenize()

Install package

## Unload and uninstall package
## --------------------------------------------------------------------------------
detach(package:papisr, unload = TRUE)
remove.packages("papisr")

## Install package
## --------------------------------------------------------------------------------
devtools::install(".")

papisr functions

collect_papis_records

##' Collects papis records
##'
##' The collection is done by (1) looking for all subdirectories with info.yml file that defines papis record, (2) filtering those records and (3) returning lists of 'path' (root dir of papis record) and 'info' (content of info.yml) for each record
##' 
##' @param dir A character vector of directories to scan recursevely for papis records. 
##' @param filter_info Exprocion that allows to filter info.yml files that is evaluated in the environment with two variables bound for each record - 'path' (root dir of papis record) and 'info' (content of info.yml). The expression should return TRUE in order for record to be filtered in. Other returned value will filter the record out. Example: `'data' %in% info$tags` will filter only records that have tag 'data' in their info.yml descriptions
##' @return list of 'path' (root dir of papis record) and 'info' (content of info.yml) for each record
##' 
##' @md 
##' @export 
collect_papis_records <- function(dir, filter_info) {
    env <- parent.frame()
    sys_call <- sys.call()
    papis_info_yml_files <- 
        list.files(dir
                 , pattern = "^info\\.y[a]?ml$"
                 , full.names = TRUE
                 , recursive = TRUE
                 , ignore.case = TRUE)
    papis_records <-
        papis_info_yml_files |>
        lapply(\(info_yml_file)
               list(path = dirname(info_yml_file)
                  , info = yaml::read_yaml(info_yml_file)))
    ## filter info.yml files based on some filter criteria
    if(length(sys_call) == 3) {
        filter_info <- sys_call[[3]]
        papis_records_filter <-
            papis_records |>
            sapply(\(papis_record) {
                filter_info |>
                    eval(papis_record, env) |>
                    isTRUE()
            })
        return(papis_records[papis_records_filter])
    } else {
        return(papis_records)
    }
}

expect_equal(
  system.file("testdata", "papis", package = "papisr") |>
  collect_papis_records("data" %in% info$tags) |>
  lapply(`[[`, "info")
  ## remove paths as in test environment it is different
, list(list(tags = c("data", "research"), url = "example.com", 
    year = 2022L), list(tags = "data", year = 2222L)))

tabulate_papis_records

##' Tablulate papis records
##' 
##' @param papis_records List of papis records as returned by `collect_papis_records()`
##' @param ... Colums specification as named expressions that are evaluated in papis record environment where two variables are bound - `path` and `info` (see `collect_papis_records()` for details. The evaluation environment is enclosed in parent frame (aka `tabulate_papis_records` calling environment)
##' 
##' @param .omit_all_na_rows Whether to remove rows with all NAs.
##' @param .bind_dot_n_and_dot_dot Whethet to bind two extra variables to the evaluation environment for columns. Namely `.n` (current row/record's number) and `..` as the entire `papis_records` input.
##' @return Data frame. If some of the column values have length > 1 then the table will be filled with these values.
##' 
##' @md 
##' @export 
tabulate_papis_records <- function(papis_records, ...
                                 , .omit_all_na_rows = TRUE
                                 , .bind_dot_n_and_dot_dot = TRUE) {
    fun_call <- sys.call()
    col_names <- ...names()
    env <- parent.frame()
    if(.bind_dot_n_and_dot_dot) {
        papis_records <- 
            mapply(
                \(papis_record, n) {
                    c(papis_record, list(.n = n, .. = papis_records))
                }
              , papis_records
              , seq_along(papis_records)
              , SIMPLIFY = FALSE)
    }
    papis_table <-
        lapply(papis_records, \(papis_record) {
            lapply(col_names
                 , \(col_name) {
                     col_val <- 
                         fun_call[[col_name]] |>
                         eval(papis_record, enclos = env)
                     if(length(col_val) == 0) {
                         return(NA)
                     } else if(is.list(col_val) || length(col_val) > 1) {
                         col_val <- 
                             col_val |>
                             lapply(\(col_val_el) {
                                 if(length(col_val_el) == 0) NA else col_val_el
                             }) |>
                             unlist()
                         return(col_val)
                     } else {
                         return(col_val)
                     }
                 }) |>
                `names<-`(col_names) |>
                as.data.frame(stringsAsFactors = FALSE
                            , check.names = FALSE)
        })
    papis_table <- do.call(rbind, papis_table)
    if(.omit_all_na_rows) {
        papis_table <- papis_table[rowSums(is.na(papis_table)) != ncol(papis_table), ]
    }
    return(papis_table)
}
expect_equal(system.file("testdata", "papis", package = "papisr") |>
             collect_papis_records() |>
             tabulate_papis_records(year = info$year
                                  , url = info$url
                                  , tag = length(info$tags))
, structure(list(year = c(2022L, 1985L, 2222L, 2000L), url = c("example.com", 
"uvt.nl", NA, NA), tag = c(2L, 2L, 1L, 1L)), row.names = c(NA, 
-4L), class = "data.frame"))


## test filling the columns
expect_equal(
    system.file("testdata", "papis", package = "papisr") |>
             collect_papis_records() |>
             tabulate_papis_records(year = info$year
                                  , url = info$url
                                  , tag = info$tags)
, structure(list(year = c(2022L, 2022L, 1985L, 1985L, 2222L, 2000L
), url = c("example.com", "example.com", "uvt.nl", "uvt.nl", 
NA, NA), tag = c("data", "research", "research", "phd", "data", 
                 "classics")), row.names = c(NA, -6L), class = "data.frame"))


## test eval environment
test_function_length_plus <- function(x) length(x) + 1

expect_equal(
    system.file("testdata", "papis", package = "papisr") |>
             collect_papis_records() |>
             tabulate_papis_records(year = info$year
                                  , url = info$url
                                  , tag = test_function_length_plus(info$tags))
, structure(list(year = c(2022L, 1985L, 2222L, 2000L), url = c("example.com", 
"uvt.nl", NA, NA), tag = c(3, 3, 2, 2)), row.names = c(NA, -4L
), class = "data.frame"))




validate_with_json_schema

validate_with_json_schema <- function(yml_file = "eee-ppat/info.yml"
                   , json_shema = "test-schema.json"
                     ## , yq_path = "."
                   , yq_path = '.data_description') {
    if(all(dependencies <- Sys.which(c('yq', 'ajv')) != "")) {
        tmp_json_file_name <-
            "tmp.json"
        ## tempfile("papis-info-yml-data", fileext = c(".json"))
        file.create(tmp_json_file_name)
        tmp_json_file <- file(tmp_json_file_name)
        ## do things here
        yq_cmd <- paste0(
            "yq --output-format=json '", yq_path, "' ", shQuote(yml_file)
        )
        system(yq_cmd, intern = TRUE) |>
            writeLines(tmp_json_file)
        ajv_cmd <- paste0(
            "ajv test",
            " -s ", shQuote(json_shema)
          , " -d ", shQuote(tmp_json_file_name)
          , " --all-errors"
          , " --valid"
          , " --messages=false"
        )
        ajv_out <- 
            system(ajv_cmd
                 , intern = TRUE
                 , ignore.stderr = TRUE, invisible = TRUE)
        close(tmp_json_file)
        ## file.remove(tmp_json_file_name)
    } else {
        stop("The following dependencies are not available on your system: "
           , paste(names(dependencies[!dependencies]), collapse = ", "))
    }
    return(ajv_out)
}