From f062c15e1923c60dc67ab655aa97b591153932b8 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 3 May 2023 08:53:04 -0400 Subject: [PATCH 01/40] Typo in readme --- README.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.Rmd b/README.Rmd index ceabc54..8717786 100644 --- a/README.Rmd +++ b/README.Rmd @@ -50,7 +50,7 @@ For features that require a Dataverse account for the specific server installati DATAVERSE_KEY="examplekey12345" ``` -in your .Renviron file, where `examplekey12345` should be replace with your own key. The environment file can be opened by `usethis::edit_r_environ()`. +in your .Renviron file, where `examplekey12345` should be replaced with your own key. The environment file can be opened by `usethis::edit_r_environ()`. #### Server From 31f86a6976331e084e4774dd43ca8ba19bedafb9 Mon Sep 17 00:00:00 2001 From: JBGruber Date: Wed, 24 May 2023 21:19:00 +0200 Subject: [PATCH 02/40] add option to only return url in get_file_by_id --- DESCRIPTION | 2 +- R/get_file_by_id.R | 6 +++++- man/files.Rd | 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b8cc4ef..9bf3a7d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -59,6 +59,6 @@ URL: https://iqss.github.io/dataverse-client-r/, https://dataverse.org/, https:/ BugReports: https://github.com/iqss/dataverse-client-r/issues VignetteBuilder: knitr Encoding: UTF-8 -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.3 Roxygen: list(markdown = TRUE) Config/testthat/edition: 3 diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R index d6710e1..a5fdf76 100644 --- a/R/get_file_by_id.R +++ b/R/get_file_by_id.R @@ -8,6 +8,7 @@ #' @param progress Whether to show a progress bar of the download. #' If not specified, will be set to `TRUE` for a file larger than 100MB. To fix #' a value, set `FALSE` or `TRUE`. +#' @param return_url Instead of downloading the file, just return the download link. #' #' @export get_file_by_id <- function( @@ -17,6 +18,7 @@ get_file_by_id <- function( vars = NULL, original = TRUE, progress = NULL, + return_url = FALSE, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ... @@ -92,7 +94,9 @@ get_file_by_id <- function( # If not bundle, request single file in non-bundle format ---- u <- paste0(api_url(server), u_part, fileid) - + if (return_url) { + return(u) + } if (isFALSE(progress)) r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), query = query, ...) diff --git a/man/files.Rd b/man/files.Rd index 67e017f..ee78bab 100644 --- a/man/files.Rd +++ b/man/files.Rd @@ -36,6 +36,7 @@ get_file_by_id( vars = NULL, original = TRUE, progress = NULL, + return_url = FALSE, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ... @@ -106,6 +107,8 @@ nlsw88.tab, use the ingested version.)} If not specified, will be set to \code{TRUE} for a file larger than 100MB. To fix a value, set \code{FALSE} or \code{TRUE}.} +\item{return_url}{Instead of downloading the file, just return the download link.} + \item{filedoi}{A DOI for a single file (not the entire dataset), of the form \code{"10.70122/FK2/PPIAXE/MHDB0O"} or \code{"doi:10.70122/FK2/PPIAXE/MHDB0O"}. Can be a vector for multiple files.} From f33e578217547f5f465bdb5f50d2b347df4fa18a Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Fri, 8 Sep 2023 10:09:24 -0400 Subject: [PATCH 03/40] Change rdata rec to load from Global --- R/get_dataframe.R | 14 ++++++-------- man/get_dataframe.Rd | 14 ++++++-------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/R/get_dataframe.R b/R/get_dataframe.R index c8c9366..50034bc 100644 --- a/R/get_dataframe.R +++ b/R/get_dataframe.R @@ -84,15 +84,13 @@ #' #' # 3. RData files are read in by `base::load()` but cannot be assigned to an #' # object name. The following shows two possible ways to read in such files. +#' # First, the RData object can be loaded to the environment without object assignment. #' -#' # First, without relying on `get_dataframe_*`, write as a binary file: -#' as_binary <- get_file_by_doi( -#' filedoi = "doi:10.70122/FK2/PPIAXE/5VPXKE", -#' server = "demo.dataverse.org") -#' -#' temp <- tempdir() -#' writeBin(as_binary, path(temp, "county.RData")) -#' load(path(temp, "county.RData")) +#' get_dataframe_by_doi( +#' filedoi = "10.70122/FK2/PPIAXE/X2FC5V", +#' server = "demo.dataverse.org", +#' original = TRUE, +#' .f = function(x) load(x, envir = .GlobalEnv)) #' #' # If you are certain each RData contains only one object, one could define a #' # custom function used in https://stackoverflow.com/a/34926943 diff --git a/man/get_dataframe.Rd b/man/get_dataframe.Rd index 3001f3d..3219bea 100644 --- a/man/get_dataframe.Rd +++ b/man/get_dataframe.Rd @@ -140,15 +140,13 @@ if (requireNamespace("haven", quietly = TRUE)) { # 3. RData files are read in by `base::load()` but cannot be assigned to an # object name. The following shows two possible ways to read in such files. +# First, the RData object can be loaded to the environment without object assignment. -# First, without relying on `get_dataframe_*`, write as a binary file: -as_binary <- get_file_by_doi( - filedoi = "doi:10.70122/FK2/PPIAXE/5VPXKE", - server = "demo.dataverse.org") - -temp <- tempdir() -writeBin(as_binary, path(temp, "county.RData")) -load(path(temp, "county.RData")) +get_dataframe_by_doi( + filedoi = "10.70122/FK2/PPIAXE/X2FC5V", + server = "demo.dataverse.org", + original = TRUE, + .f = function(x) load(x, envir = .GlobalEnv)) # If you are certain each RData contains only one object, one could define a # custom function used in https://stackoverflow.com/a/34926943 From f0b7ff0d8a44c29dc251fe7b10f90029acd841d7 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Fri, 8 Sep 2023 10:09:51 -0400 Subject: [PATCH 04/40] Document changes to #107 --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ .../tests-get_dataframe-dataframe-basketball.R | 15 ++++----------- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b8cc4ef..9bf3a7d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -59,6 +59,6 @@ URL: https://iqss.github.io/dataverse-client-r/, https://dataverse.org/, https:/ BugReports: https://github.com/iqss/dataverse-client-r/issues VignetteBuilder: knitr Encoding: UTF-8 -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.3 Roxygen: list(markdown = TRUE) Config/testthat/edition: 3 diff --git a/NEWS.md b/NEWS.md index 2a31a92..ee0b605 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # dataverse +# 0.3.13.9999 + +* Improve recommendation for rdata loading (#107) + # CHANGES in dataverse 0.3.12 and 0.3.13 * Update expired token (#123) diff --git a/tests/testthat/tests-get_dataframe-dataframe-basketball.R b/tests/testthat/tests-get_dataframe-dataframe-basketball.R index 52e6c86..ab1c4ce 100644 --- a/tests/testthat/tests-get_dataframe-dataframe-basketball.R +++ b/tests/testthat/tests-get_dataframe-dataframe-basketball.R @@ -48,18 +48,11 @@ test_that("load-rdata", { # testthat::skip_if_offline("demo.dataverse.org") testthat::skip_on_cran() - # https://stackoverflow.com/a/34926943 - f_load_rda <- function(file) { - tmp <- new.env() - load(file = file, envir = tmp) - tmp[[ls(tmp)[1]]] - } - - from_rda <- get_dataframe_by_id( + get_dataframe_by_id( file = 1939003, server = "demo.dataverse.org", - .f = f_load_rda, - original = TRUE) + original = TRUE, + .f = function(x) load(x, envir = .GlobalEnv)) - expect_s3_class(from_rda, "tbl") + expect_s3_class(nlsw88, "tbl") }) From f476079f32a8c14bd03d7b3ebd5bf7efd75ec6b0 Mon Sep 17 00:00:00 2001 From: JBGruber Date: Thu, 14 Sep 2023 21:33:20 +0200 Subject: [PATCH 05/40] implements return_url for other get_file_by_* functions --- DESCRIPTION | 9 +++++++-- NEWS.md | 4 ++++ R/get_file.R | 5 +++++ R/get_file_by_id.R | 2 ++ man/files.Rd | 7 +++++-- man/get_dataframe.Rd | 1 + 6 files changed, 24 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9bf3a7d..154609f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: dataverse -Version: 0.3.13 +Version: 0.3.13.9000 Title: Client for Dataverse 4+ Repositories Authors@R: c(person(given = "Shiro", @@ -32,7 +32,12 @@ Authors@R: role = "ctb"), person(given = "Edward", family = "Jee", - role = "ctb")) + role = "ctb"), + person(given = "Johannes", + family = "Gruber", + role = c("aut"), + email = "JohannesB.Gruber@gmail.com", + comment = c(ORCID = "0000-0001-9177-1772"))) Imports: checkmate, httr, diff --git a/NEWS.md b/NEWS.md index 2a31a92..77f00dc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # dataverse +# CHANGES in dataverse 0.3.14 + +* `get_file_by_*()` can now return the download URL to be used in external functions or programs (useful for large files) + # CHANGES in dataverse 0.3.12 and 0.3.13 * Update expired token (#123) diff --git a/R/get_file.R b/R/get_file.R index 37f9c68..5dc5edc 100644 --- a/R/get_file.R +++ b/R/get_file.R @@ -85,6 +85,7 @@ get_file <- function( dataset = NULL, format = c("original", "bundle"), vars = NULL, + return_url = FALSE, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), original = TRUE, @@ -124,6 +125,7 @@ get_file <- function( key = key, server = server, original = original, + return_url = return_url, ... ) } @@ -141,6 +143,7 @@ get_file <- function( #' @param filename Filename of the dataset, with file extension as shown in Dataverse #' (for example, if nlsw88.dta was the original but is displayed as the ingested #' nlsw88.tab, use the ingested version.) +#' @param return_url Instead of downloading the file, just return the download link. #' #' @export get_file_by_name <- function( @@ -148,6 +151,7 @@ get_file_by_name <- function( dataset, format = c("original", "bundle"), vars = NULL, + return_url = FALSE, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), original = TRUE, @@ -171,6 +175,7 @@ get_file_by_name <- function( key = key, server = server, original = original, + return_url = return_url, ... ) } diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R index a5fdf76..2fb2a6f 100644 --- a/R/get_file_by_id.R +++ b/R/get_file_by_id.R @@ -121,6 +121,7 @@ get_file_by_doi <- function( format = c("original", "bundle"), vars = NULL, original = TRUE, + return_url = FALSE, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ... @@ -133,6 +134,7 @@ get_file_by_doi <- function( key = key, server = server, original = original, + return_url = return_url, ... ) } diff --git a/man/files.Rd b/man/files.Rd index ee78bab..71bfa51 100644 --- a/man/files.Rd +++ b/man/files.Rd @@ -12,6 +12,7 @@ get_file( dataset = NULL, format = c("original", "bundle"), vars = NULL, + return_url = FALSE, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), original = TRUE, @@ -23,6 +24,7 @@ get_file_by_name( dataset, format = c("original", "bundle"), vars = NULL, + return_url = FALSE, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), original = TRUE, @@ -48,6 +50,7 @@ get_file_by_doi( format = c("original", "bundle"), vars = NULL, original = TRUE, + return_url = FALSE, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ... @@ -75,6 +78,8 @@ See \url{https://guides.dataverse.org/en/latest/api/dataaccess.html} for details \item{vars}{A character vector specifying one or more variable names, used to extract a subset of the data.} +\item{return_url}{Instead of downloading the file, just return the download link.} + \item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. Keys can be specified atomically or globally using @@ -107,8 +112,6 @@ nlsw88.tab, use the ingested version.)} If not specified, will be set to \code{TRUE} for a file larger than 100MB. To fix a value, set \code{FALSE} or \code{TRUE}.} -\item{return_url}{Instead of downloading the file, just return the download link.} - \item{filedoi}{A DOI for a single file (not the entire dataset), of the form \code{"10.70122/FK2/PPIAXE/MHDB0O"} or \code{"doi:10.70122/FK2/PPIAXE/MHDB0O"}. Can be a vector for multiple files.} diff --git a/man/get_dataframe.Rd b/man/get_dataframe.Rd index 3001f3d..641d2a2 100644 --- a/man/get_dataframe.Rd +++ b/man/get_dataframe.Rd @@ -64,6 +64,7 @@ be set as a default via an environment variable. To set a default, run \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")} or add \verb{DATAVERSE_SERVER = "dataverse.harvard.edu} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} + \item{\code{return_url}}{Instead of downloading the file, just return the download link.} }} \item{fileid}{A numeric ID internally used for \code{get_file_by_id}. Can be a vector for multiple files.} From c2d0477de64db35dadc37b31f2ff2957907966e9 Mon Sep 17 00:00:00 2001 From: JBGruber Date: Thu, 14 Sep 2023 21:53:14 +0200 Subject: [PATCH 06/40] adds test for return_url --- tests/testthat/tests-get_file.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/tests-get_file.R b/tests/testthat/tests-get_file.R index 8bad37e..27aba5d 100644 --- a/tests/testthat/tests-get_file.R +++ b/tests/testthat/tests-get_file.R @@ -54,3 +54,11 @@ test_that("More informative error message when file does not exist", { # wrong server expect_error(get_file(2972336, server = "demo.dataverse.org"), regexp = "API") }) + +# Informative error message (PR #30) +test_that("Return just URL", { + testthat::skip_on_cran() + expect_equal(get_file(c(1734005, 1734006), format = "original", server = "demo.dataverse.org", return_url = TRUE), + list("https://demo.dataverse.org/api/access/datafile/1734005", + "https://demo.dataverse.org/api/access/datafile/1734006")) +}) From 4c3e165c147a33bad762afadea847367dec83c77 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Tue, 30 Apr 2024 10:29:11 -0400 Subject: [PATCH 07/40] Create test_build.yml for python environment from https://github.com/gdcc/pyDataverse --- .github/workflows/test_build.yml | 40 ++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 .github/workflows/test_build.yml diff --git a/.github/workflows/test_build.yml b/.github/workflows/test_build.yml new file mode 100644 index 0000000..bdbd72f --- /dev/null +++ b/.github/workflows/test_build.yml @@ -0,0 +1,40 @@ +name: Unit tests +on: [push] + +jobs: + custom_test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ + "3.8", + "3.9", + "3.10", + "3.11" + ] + name: Test pyDataverse + env: + PORT: 8080 + steps: + - name: "Checkout" + uses: "actions/checkout@v4" + - name: Run Dataverse Action + id: dataverse + uses: gdcc/dataverse-action@main + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install Python Dependencies + run: | + python3 -m pip install --upgrade pip + python3 -m pip install -r ./requirements/tests.txt + pip install -e . + - name: Run tests + env: + API_TOKEN_SUPERUSER: ${{ steps.dataverse.outputs.api_token }} + API_TOKEN: ${{ steps.dataverse.outputs.api_token }} + BASE_URL: ${{ steps.dataverse.outputs.base_url }} + DV_VERSION: ${{ steps.dataverse.outputs.dv_version }} + run: | + python3 -m pytest From a0b41b905d187112b919df211b03122840a0022a Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Thu, 9 May 2024 19:39:19 -0400 Subject: [PATCH 08/40] Undo add dataverse container --- .github/workflows/test_build.yml | 40 -------------------------------- 1 file changed, 40 deletions(-) delete mode 100644 .github/workflows/test_build.yml diff --git a/.github/workflows/test_build.yml b/.github/workflows/test_build.yml deleted file mode 100644 index bdbd72f..0000000 --- a/.github/workflows/test_build.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: Unit tests -on: [push] - -jobs: - custom_test: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [ - "3.8", - "3.9", - "3.10", - "3.11" - ] - name: Test pyDataverse - env: - PORT: 8080 - steps: - - name: "Checkout" - uses: "actions/checkout@v4" - - name: Run Dataverse Action - id: dataverse - uses: gdcc/dataverse-action@main - - name: Setup Python - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install Python Dependencies - run: | - python3 -m pip install --upgrade pip - python3 -m pip install -r ./requirements/tests.txt - pip install -e . - - name: Run tests - env: - API_TOKEN_SUPERUSER: ${{ steps.dataverse.outputs.api_token }} - API_TOKEN: ${{ steps.dataverse.outputs.api_token }} - BASE_URL: ${{ steps.dataverse.outputs.base_url }} - DV_VERSION: ${{ steps.dataverse.outputs.dv_version }} - run: | - python3 -m pytest From 1bb3326416ce4b7817e54f20e22bfbf4d8111df9 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Thu, 9 May 2024 19:40:14 -0400 Subject: [PATCH 09/40] Try putting R code in tests and referencing that in yaml --- .Rbuildignore | 1 + .github/workflows/R-CMD-check-dev.yaml | 5 ++ tests/B-search_ghaction.R | 22 ++++++++ tests/C-download_ghaction.R | 76 ++++++++++++++++++++++++++ vignettes/B-search.Rmd | 72 +++++++++++++++++++++++- vignettes/C-download.Rmd | 64 ++++++++++++++++++---- 6 files changed, 225 insertions(+), 15 deletions(-) create mode 100644 tests/B-search_ghaction.R create mode 100644 tests/C-download_ghaction.R diff --git a/.Rbuildignore b/.Rbuildignore index 273e558..e9e5a8b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,6 +6,7 @@ man-roxygen/* ^README\.Rmd$ ^README\.html$ ^CONTRIBUTING\.md$ +tests/.*_ghaction.R ^vignettes/figure$ ^vignettes/figure/.+$ \.Rmd2$ diff --git a/.github/workflows/R-CMD-check-dev.yaml b/.github/workflows/R-CMD-check-dev.yaml index c4692a5..c939711 100644 --- a/.github/workflows/R-CMD-check-dev.yaml +++ b/.github/workflows/R-CMD-check-dev.yaml @@ -47,6 +47,11 @@ jobs: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") shell: Rscript {0} + - name: Test live dataverse (vignette) + run: | + Rscript -e "tests/B-search_ghaction.R" + Rscript -e "tests/C_download_ghaction.R" + - name: Test coverage run: covr::codecov() shell: Rscript {0} diff --git a/tests/B-search_ghaction.R b/tests/B-search_ghaction.R new file mode 100644 index 0000000..170dd98 --- /dev/null +++ b/tests/B-search_ghaction.R @@ -0,0 +1,22 @@ +## ----knitr_options, echo=FALSE, results="hide"---------------------------------------------- +options(width = 120) +knitr::opts_chunk$set(results = "hold") + + +## ------------------------------------------------------------------------------------------- +library("dataverse") +Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") +dataverse_search("Gary King")[c("name")] + + +## ------------------------------------------------------------------------------------------- +dataverse_search("Gary King", start = 6, per_page = 20)[c("name")] + + +## ------------------------------------------------------------------------------------------- +ei <- dataverse_search(author = "Gary King", title = "Ecological Inference", type = "dataset", per_page = 20) +# fields returned +names(ei) +# names of datasets +ei$name + diff --git a/tests/C-download_ghaction.R b/tests/C-download_ghaction.R new file mode 100644 index 0000000..592f040 --- /dev/null +++ b/tests/C-download_ghaction.R @@ -0,0 +1,76 @@ +## ----knitr_options, echo=FALSE, results="hide"---------------------------------------------- +options(width = 120) +knitr::opts_chunk$set(results = "hold") + + +## ------------------------------------------------------------------------------------------- +Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") + + +## ------------------------------------------------------------------------------------------- +library("dataverse") +library("tibble") # to see dataframes in tidyverse-form + + +## ----echo=FALSE, message=FALSE,include=FALSE------------------------------------------------ +energy <- get_dataframe_by_name( + filename = "comprehensiveJapanEnergy.tab", + dataset = "10.7910/DVN/ARKOTI", + server = "dataverse.harvard.edu") + + +## ----eval=FALSE----------------------------------------------------------------------------- +## energy <- get_dataframe_by_name( +## filename = "comprehensiveJapanEnergy.tab", +## dataset = "10.7910/DVN/ARKOTI", +## server = "dataverse.harvard.edu") + + +## ------------------------------------------------------------------------------------------- +head(energy) + + +## ------------------------------------------------------------------------------------------- +library(readr) +energy <- get_dataframe_by_name( + filename = "comprehensiveJapanEnergy.tab", + dataset = "10.7910/DVN/ARKOTI", + server = "dataverse.harvard.edu", + .f = function(x) read.delim(x, sep = "\t")) + +head(energy) + + +## ----message=FALSE-------------------------------------------------------------------------- +argentina_tab <- get_dataframe_by_name( + filename = "alpl2013.tab", + dataset = "10.7910/DVN/ARKOTI", + server = "dataverse.harvard.edu") + + +## ------------------------------------------------------------------------------------------- +str(argentina_tab$polling_place) + + +## ------------------------------------------------------------------------------------------- +argentina_dta <- get_dataframe_by_name( + filename = "alpl2013.tab", + dataset = "10.7910/DVN/ARKOTI", + server = "dataverse.harvard.edu", + original = TRUE, + .f = haven::read_dta) + + +## ------------------------------------------------------------------------------------------- +str(argentina_dta$polling_place) + + +## ------------------------------------------------------------------------------------------- +str(dataset_metadata("10.7910/DVN/ARKOTI", server = "dataverse.harvard.edu"), + max.level = 2) + + +## ----eval = FALSE--------------------------------------------------------------------------- +## code3 <- get_file("chapter03.R", "doi:10.7910/DVN/ARKOTI", server = "dataverse.harvard.edu") +## writeBin(code3, "chapter03.R") + diff --git a/vignettes/B-search.Rmd b/vignettes/B-search.Rmd index cf363c6..458d927 100644 --- a/vignettes/B-search.Rmd +++ b/vignettes/B-search.Rmd @@ -17,21 +17,52 @@ knitr::opts_chunk$set(results = "hold") Searching for data within Dataverse is quite easy using the `dataverse_search()` function. The simplest searches simply consist of a query string: -```{r} +```{r, eval=FALSE} library("dataverse") Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") dataverse_search("Gary King")[c("name")] ``` +```{r} +## name +## 1 004_informal_food_retail_Nigeria_2018.tab +## 2 00592Belle-Stress-PaperData-Subject_King_ChildIs.PDF +## 3 00592Belle-Stress-PaperData-Subject_King_ChildO.PDF +## 4 00592Belle-Stress-PaperData-Subject_King_Coping.PDF +## 5 00592Belle-Stress-PaperData-Subject_King_Discrimination.PDF +## 6 00592Belle-Stress-PaperData-Subject_King_LifeCs.PDF +## 7 00592Belle-Stress-PaperData-Subject_King_LifeE.PDF +## 8 00592Belle-Stress-PaperData-Subject_KingAndMeunier_Parenting.PDF +## 9 00698McArthur-King-BoxCoverSheets.pdf +## 10 00698McArthur-King-MemoOfAgreement.pdf +``` + + The results are paginated, so users can rely upon the `per_page` and `start` argument to requested subsequent pages of results. We'll start at 6 and to show that we retrieve the last five results from the previous query plus 15 more (due to `per_page = 20`): -```{r} +```{r, eval=FALSE} dataverse_search("Gary King", start = 6, per_page = 20)[c("name")] ``` +```{r} +# 10 of 3676 results retrieved +## name +## 1 004_informal_food_retail_Nigeria_2018.tab +## 2 00698McArthur-King-BoxCoverSheets.pdf +## 3 00698McArthur-King-MemoOfAgreement.pdf +## 4 00698McArthur-King-StudyDescription.pdf +## 5 01 ReadMe Unlocking history through automated virtual unfolding of sealed documents imaged by X-ray microtomography +## 6 01_ReadMe_The_Spiral_Locked_Letters_of_Elizabeth_I_and_Mary_Queen_of_Scots +## 7 03 Brienne Collection letterlocking data: Images folder 02/16, DB-0874_2–DB-0903 +## 8 03 Brienne Collection letterlocking data: Images folder 04/16, DB-0988–DB-1109_03 +## 9 03 Brienne Collection letterlocking data: Images folder 06/16, DB-1241_02–DB-1339_06 +## 10 03 Brienne Collection letterlocking data: Images folder 08/16, DB-1455_02–DB-1564_01 +``` + + More complicated searches can specify metadata fields like `title` and restrict results to a specific `type` of Dataverse object (a "dataverse", "dataset", or "file"): -```{r} +```{r, eval=FALSE} ei <- dataverse_search(author = "Gary King", title = "Ecological Inference", type = "dataset", per_page = 20) # fields returned names(ei) @@ -39,4 +70,39 @@ names(ei) ei$name ``` +```{r} +## [1] "name" "type" "url" "global_id" +## [5] "description" "published_at" "publisher" "citationHtml" +## [9] "identifier_of_dataverse" "name_of_dataverse" "citation" "storageIdentifier" +## [13] "keywords" "subjects" "fileCount" "versionId" +## [17] "versionState" "majorVersion" "minorVersion" "createdAt" +## [21] "updatedAt" "contacts" "authors" "publications" +## [1] "01 ReadMe Unlocking history through automated virtual unfolding of sealed documents imaged by X-ray microtomography" +## [2] "01_ReadMe_The_Spiral_Locked_Letters_of_Elizabeth_I_and_Mary_Queen_of_Scots" +## [3] "03 Brienne Collection letterlocking data: Images folder 02/16, DB-0874_2–DB-0903" +## [4] "03 Brienne Collection letterlocking data: Images folder 04/16, DB-0988–DB-1109_03" +## [5] "03 Brienne Collection letterlocking data: Images folder 06/16, DB-1241_02–DB-1339_06" +## [6] "03 Brienne Collection letterlocking data: Images folder 08/16, DB-1455_02–DB-1564_01" +## [7] "03 Brienne Collection letterlocking data: Images folder 12/16, DB-1868–DB-1963_03" +## [8] "03 Brienne Collection letterlocking data: Images folder 14/16, DB-2064_01–2155_03" +## [9] "03 Spiral-lock figures" +## [10] "07 Letterlocking Categories and Formats Chart" +## [11] "10 Foldable: Launch Little Book of Locks (UH6089), with Categories and Formats Chart. Letterlocking Instructional Resources" +## [12] "10 Million International Dyadic Events" +## [13] "1479 data points of covid19 policy response times" +## [14] "2016 Census of Population: ADA and DA Maps for Kings County Nova Scotia" +## [15] "3D Dust map from Green et al. (2015)" +## [16] "3D dust map from Green et al. (2017)" +## [17] "3D dust map from Green et al. (2019)" +## [18] "A 1D Lyman-alpha Profile Camera for Plasma Edge Neutral Studies on the DIII-D Tokamak" +## [19] "A Comparative Analysis of Brazil's Foreign Policy Drivers Towards the USA: Comment on Amorim Neto (2011)" +## [20] "A Critique of Dyadic Design" +## 16 1998 Jewish Community Study of the Coachella Valley, California +## 17 2002 State Legislative Survey +## 18 2007 White Sands Dune Field lidar topographic data +## 19 2008 White Sands Dune Field lidar topographic data +## 20 2012 STATA Data.tab + +``` + Once datasets and files are identified, it is easy to download and use them directly in R. See the ["Data Download" vignette](C-download.html) for details. diff --git a/vignettes/C-download.Rmd b/vignettes/C-download.Rmd index 754f4d1..a5ba31d 100644 --- a/vignettes/C-download.Rmd +++ b/vignettes/C-download.Rmd @@ -42,7 +42,8 @@ library("tibble") # to see dataframes in tidyverse-form First, we retrieve a plain-text file like this dataset on electricity consumption by [Wakiyama et al. (2014)](https://doi.org/10.7910/DVN/ARKOTI/GN1MRT). Taking the file name and dataset DOI from this entry, -```{r, echo=FALSE, message=FALSE,include=FALSE} + +```{r, eval=FALSE} energy <- get_dataframe_by_name( filename = "comprehensiveJapanEnergy.tab", dataset = "10.7910/DVN/ARKOTI", @@ -50,20 +51,26 @@ energy <- get_dataframe_by_name( ``` ```{r, eval=FALSE} -energy <- get_dataframe_by_name( - filename = "comprehensiveJapanEnergy.tab", - dataset = "10.7910/DVN/ARKOTI", - server = "dataverse.harvard.edu") +head(energy) ``` ```{r} -head(energy) +## # A tibble: 6 × 10 +## time date dummy temp temp2 all large house kepco tepco +## +## 1 1 8-Jan 0 5.9 34.8 95792389 35194957 26190714 13357735 26960899 +## 2 2 8-Feb 0 5.5 30.3 95156901 35322031 24224097 13315027 27189705 +## 3 3 8-Mar 0 10.7 114. 91034047 36474192 21391965 12805831 24495519 +## 4 4 8-Apr 0 14.7 216. 84087552 34949622 18494473 11494328 23540356 +## 5 5 8-May 0 18.5 342. 82742929 35417089 17923760 11589061 22848737 +## 6 6 8-Jun 0 21.3 454. 82180013 36692291 15205229 11360771 22487441 ``` + These `get_dataframe_*` functions, introduced in v0.3.0, directly read in the data into a R environment through whatever R function supplied by `.f`. The default of the `get_dataframe_*` functions is to read in such data by `readr::read_tsv()`. The `.f` function can be modified to modify the read-in settings. For example, the following modification is a base-R equivalent to read in the ingested data. -```{r} +```{r, eval=FALSE} library(readr) energy <- get_dataframe_by_name( filename = "comprehensiveJapanEnergy.tab", @@ -74,6 +81,16 @@ energy <- get_dataframe_by_name( head(energy) ``` +```{r} +## time date dummy temp temp2 all large house kepco tepco +## 1 1 8-Jan 0 5.9 34.8 95792389 35194957 26190714 13357735 26960899 +## 2 2 8-Feb 0 5.5 30.3 95156901 35322031 24224097 13315027 27189705 +## 3 3 8-Mar 0 10.7 114.5 91034047 36474192 21391965 12805831 24495519 +## 4 4 8-Apr 0 14.7 216.1 84087552 34949622 18494473 11494328 23540356 +## 5 5 8-May 0 18.5 342.3 82742929 35417089 17923760 11589061 22848737 +## 6 6 8-Jun 0 21.3 453.7 82180013 36692291 15205229 11360771 22487441 +``` + The dataverse package can also download datasets that are _drafts_ (i.e. versions not released publicly), as long as the user of the dataset provides their appropriate DATAVERSE_KEY. Users may need to modify the metadata of a datafile, such as adding a descriptive label, for the data downloading to work properly in this case. This is because the the file identifier UNF, which the read function relies on, may only appear after metadata has been added. @@ -83,7 +100,7 @@ The dataverse package can also download datasets that are _drafts_ (i.e. version If a file is displayed on dataverse as a `.tab` file like the survey data by [Alvarez et al. (2013)](https://doi.org/10.7910/DVN/ARKOTI/A8YRMP), it is likely that Dataverse [ingested](https://guides.dataverse.org/en/latest/user/tabulardataingest/index.html) the file to a plain-text, tab-delimited format. -```{r, message=FALSE} +```{r, message=FALSE,eval=FALSE} argentina_tab <- get_dataframe_by_name( filename = "alpl2013.tab", dataset = "10.7910/DVN/ARKOTI", @@ -93,13 +110,17 @@ argentina_tab <- get_dataframe_by_name( However, ingested files may not retain important dataset attributes. For example, Stata and SPSS datasets encode value labels on to numeric values. Factor variables in R dataframes encode levels, not only labels. A plain-text ingested file will discard such information. For example, the `polling_place` variable in this data is only given by numbers, although the original data labelled these numbers with informative values. -```{r} +```{r,eval=FALSE} str(argentina_tab$polling_place) ``` +```{r} +## num [1:1475] 31 31 31 31 31 31 31 31 31 31 ... +``` + When ingesting, Dataverse retains a `original` version that retains these attributes but may not be readable in some platforms. The `get_dataframe_*` functions have an argument that can be set to `original = TRUE`. In this case we know that `alpl2013.tab` was originally a Stata dta file, so we can run: -```{r} +```{r, eval=FALSE} argentina_dta <- get_dataframe_by_name( filename = "alpl2013.tab", dataset = "10.7910/DVN/ARKOTI", @@ -110,10 +131,17 @@ argentina_dta <- get_dataframe_by_name( Now we see that labels are read in through `haven`'s labelled variables class: -```{r} +```{r, eval=FALSE} str(argentina_dta$polling_place) ``` +```{r} +## dbl+lbl [1:1475] 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 3... +## @ label : chr "polling_place" +## @ format.stata: chr "%9.0g" +## @ labels : Named num [1:37] 1 2 3 4 5 6 7 8 9 10 ... +## ..- attr(*, "names")= chr [1:37] "E.E.T." "Escuela Juan Bautista Alberdi" "Escuela Juan Carlos Dávalos" "Escuela Bernardino de Rivadavia" ... +``` Users should pick `.f` and `original` based on their existing knowledge of the file. If the original file is a `.sav` SPSS file, `.f` can be `haven::read_sav`. If it is a `.Rds` file, use `readRDS` or `readr::read_rds`. In fact, because the raw data is read in as a binary, there is no limitation to the file types `get_dataframe_*` can read in, as far as the dataverse package is concerned. @@ -138,11 +166,23 @@ This shows that there are indeed 32 files, a mix of .R code files and tab- and c You can also retrieve more extensive metadata using `dataset_metadata()`: -```{r} +```{r, eval=FALSE} str(dataset_metadata("10.7910/DVN/ARKOTI", server = "dataverse.harvard.edu"), max.level = 2) ``` +```{r} +## List of 3 +## $ displayName: chr "Citation Metadata" +## $ name : chr "citation" +## $ fields :'data.frame': 7 obs. of 4 variables: +## ..$ typeName : chr [1:7] "title" "author" "datasetContact" "dsDescription" ... +## ..$ multiple : logi [1:7] FALSE TRUE TRUE TRUE TRUE FALSE ... +## ..$ typeClass: chr [1:7] "primitive" "compound" "compound" "compound" ... +## ..$ value :List of 7 +``` + + ## Retrieving Scripts and Other Files If the file you want to retrieve is not data, you may want to use the more primitive function, `get_file`, which gets the file data as a raw binary file. See the help page examples of `get_file()` that use the `base::writeBin()` function for details on how to write and read these binary files instead. From 9d35cbb8ad356996b46629be8b98ea0883a82893 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Thu, 9 May 2024 20:22:44 -0400 Subject: [PATCH 10/40] Remove sysreqs (r-hub/sysreqs was archived April 2024) --- .github/workflows/R-CMD-check-dev.yaml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/.github/workflows/R-CMD-check-dev.yaml b/.github/workflows/R-CMD-check-dev.yaml index c939711..6316977 100644 --- a/.github/workflows/R-CMD-check-dev.yaml +++ b/.github/workflows/R-CMD-check-dev.yaml @@ -19,7 +19,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v2 - uses: r-lib/actions/setup-r@v2 - uses: r-lib/actions/setup-r-dependencies@v2 - uses: r-lib/actions/setup-pandoc@v2 @@ -32,15 +32,6 @@ jobs: key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - name: Install system dependencies - if: runner.os == 'Linux' - env: - RHUB_PLATFORM: linux-x86_64-ubuntu-gcc - run: | - Rscript -e "remotes::install_github('r-hub/sysreqs')" - sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") - sudo -s eval "$sysreqs" - - name: Check run: | options(crayon.enabled = TRUE) From f668a72deb38054f1dff6a065670a0545b86b2c8 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Thu, 9 May 2024 20:44:05 -0400 Subject: [PATCH 11/40] Update token (expired 2024-03-22) --- inst/constants.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/constants.yml b/inst/constants.yml index 720b209..4266efe 100644 --- a/inst/constants.yml +++ b/inst/constants.yml @@ -1,4 +1,4 @@ server: "demo.dataverse.org" -api_token: "c0d39fb3-ac48-4db7-9781-390073440495" -api_token_expiration: "2024-03-22" +api_token: "15372813-c54f-471f-a3e8-c269ee6a610f" +api_token_expiration: "2025-05-10" api_token_name: "shirokuriwaki" From fe3751a727412f8823661dfb70da40ec59d8a461 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Thu, 9 May 2024 20:44:21 -0400 Subject: [PATCH 12/40] Try to cache dependencies even if run fails --- .github/workflows/R-CMD-check-dev.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/R-CMD-check-dev.yaml b/.github/workflows/R-CMD-check-dev.yaml index 6316977..ea60933 100644 --- a/.github/workflows/R-CMD-check-dev.yaml +++ b/.github/workflows/R-CMD-check-dev.yaml @@ -22,6 +22,9 @@ jobs: - uses: actions/checkout@v2 - uses: r-lib/actions/setup-r@v2 - uses: r-lib/actions/setup-r-dependencies@v2 + with: + cache: "always" + - uses: r-lib/actions/setup-pandoc@v2 - name: Cache R packages From 1d193a2aeb228cced817e99164745880f326e60b Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Thu, 9 May 2024 21:40:42 -0400 Subject: [PATCH 13/40] Equality in sets, not order, for sometimes order seems to change --- tests/testthat/tests-get_dataset.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/testthat/tests-get_dataset.R b/tests/testthat/tests-get_dataset.R index cc9b9cc..190d5c8 100644 --- a/tests/testthat/tests-get_dataset.R +++ b/tests/testthat/tests-get_dataset.R @@ -23,15 +23,15 @@ test_that("download tab from DOI and filename", { expect_equal(nrow(files) , 2L) expect_equal(ncol(files) , 22L) - expect_equal(files$label , c("roster-bulls-1996.tab", "vector-basketball.svg")) - expect_equal(files$restricted , c(FALSE, FALSE)) - expect_equal(files$version , c(3L, 2L)) - expect_equal(files$datasetVersionId , c(actual$id, actual$id)) - expect_equal(files$directoryLabel , c(NA, "resources")) - expect_equal(files$id , c(1734005L, 1734006L)) - expect_equal(files$persistentId , c("doi:10.70122/FK2/HXJVJU/SA3Z2V", "doi:10.70122/FK2/HXJVJU/FHV8ZB")) - expect_equal(files$pidURL , c("https://doi.org/10.70122/FK2/HXJVJU/SA3Z2V", "https://doi.org/10.70122/FK2/HXJVJU/FHV8ZB")) - expect_equal(files$filename , c("roster-bulls-1996.tab", "vector-basketball.svg")) + expect_setequal(files$label , c("roster-bulls-1996.tab", "vector-basketball.svg")) + expect_setequal(files$restricted , c(FALSE, FALSE)) + expect_setequal(files$version , c(3L, 2L)) + expect_setequal(files$datasetVersionId , c(actual$id, actual$id)) + expect_setequal(files$directoryLabel , c(NA, "resources")) + expect_setequal(files$id , c(1734005L, 1734006L)) + expect_setequal(files$persistentId , c("doi:10.70122/FK2/HXJVJU/SA3Z2V", "doi:10.70122/FK2/HXJVJU/FHV8ZB")) + expect_setequal(files$pidURL , c("https://doi.org/10.70122/FK2/HXJVJU/SA3Z2V", "https://doi.org/10.70122/FK2/HXJVJU/FHV8ZB")) + expect_setequal(files$filename , c("roster-bulls-1996.tab", "vector-basketball.svg")) # expect_equal(files$description , c(NA, "CC-0-from-https://publicdomainvectors.org/en/free-clipart/Basketball-vector-symbol/69448.html")) }) From 99d3567479f10760e16c6bfe5b1bd26d53c5a55d Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sat, 11 May 2024 18:34:41 -0400 Subject: [PATCH 14/40] Dataverse now adds 2 or more columns of metadata about each file --- tests/testthat/tests-dataset_metadata.R | 2 +- tests/testthat/tests-get_dataset.R | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/tests-dataset_metadata.R b/tests/testthat/tests-dataset_metadata.R index 9e90e8f..29f2945 100644 --- a/tests/testthat/tests-dataset_metadata.R +++ b/tests/testthat/tests-dataset_metadata.R @@ -22,6 +22,6 @@ test_that("check versions format", { ds_index <- which(sapply(contents, function(x) x$identifier) == "FK2/HXJVJU") actual <- dataset_versions(contents[[ds_index]]) - expect_length(actual[[1]], 15L) + expect_length(actual[[1]], 17L) expect_s3_class(actual[[2]], "dataverse_dataset_version") }) diff --git a/tests/testthat/tests-get_dataset.R b/tests/testthat/tests-get_dataset.R index 190d5c8..212b985 100644 --- a/tests/testthat/tests-get_dataset.R +++ b/tests/testthat/tests-get_dataset.R @@ -1,5 +1,5 @@ # See https://demo.dataverse.org/dataverse/dataverse-client-r -# https://doi.org/10.70122/FK2/HXJVJU +# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU test_that("download tab from DOI and filename", { # testthat::skip_if_offline("demo.dataverse.org") @@ -11,7 +11,7 @@ test_that("download tab from DOI and filename", { files <- actual$files expected_dv <- retrieve_info_dataverse("expected-dataverse.yml") - expect_length(actual , 15L) + expect_length(actual , 17L) expect_equal(actual$id , 182158L) expect_equal(actual$datasetId , 1734004L) expect_equal(actual$datasetPersistentId , "doi:10.70122/FK2/HXJVJU") @@ -21,7 +21,7 @@ test_that("download tab from DOI and filename", { expect_equal(actual$license$name , "CC0 1.0") expect_equal(nrow(files) , 2L) - expect_equal(ncol(files) , 22L) + expect_equal(ncol(files) , 26L) expect_setequal(files$label , c("roster-bulls-1996.tab", "vector-basketball.svg")) expect_setequal(files$restricted , c(FALSE, FALSE)) From b9c22cadb4faf9347aad858792dbd106f91dd469 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sat, 11 May 2024 18:35:59 -0400 Subject: [PATCH 15/40] Use URLS that work in comment --- tests/testthat/tests-dataset_files.R | 2 +- tests/testthat/tests-dataset_metadata.R | 2 +- tests/testthat/tests-get_dataframe-dataframe-basketball.R | 2 +- tests/testthat/tests-get_dataframe-original-basketball.R | 2 +- tests/testthat/tests-get_file.R | 2 +- tests/testthat/tests-get_file_metadata.R | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testthat/tests-dataset_files.R b/tests/testthat/tests-dataset_files.R index 6da8dd3..aca8104 100644 --- a/tests/testthat/tests-dataset_files.R +++ b/tests/testthat/tests-dataset_files.R @@ -1,5 +1,5 @@ # See https://demo.dataverse.org/dataverse/dataverse-client-r -# https://doi.org/10.70122/FK2/HXJVJU +# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU test_that("download tab from DOI and filename", { # testthat::skip_if_offline("demo.dataverse.org") diff --git a/tests/testthat/tests-dataset_metadata.R b/tests/testthat/tests-dataset_metadata.R index 29f2945..2fcdfa7 100644 --- a/tests/testthat/tests-dataset_metadata.R +++ b/tests/testthat/tests-dataset_metadata.R @@ -1,5 +1,5 @@ # See https://demo.dataverse.org/dataverse/dataverse-client-r -# https://doi.org/10.70122/FK2/HXJVJU +# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU test_that("check metadata format", { # testthat::skip_if_offline("demo.dataverse.org") diff --git a/tests/testthat/tests-get_dataframe-dataframe-basketball.R b/tests/testthat/tests-get_dataframe-dataframe-basketball.R index ab1c4ce..e963b29 100644 --- a/tests/testthat/tests-get_dataframe-dataframe-basketball.R +++ b/tests/testthat/tests-get_dataframe-dataframe-basketball.R @@ -1,5 +1,5 @@ # See https://demo.dataverse.org/dataverse/dataverse-client-r -# https://doi.org/10.70122/FK2/HXJVJU +# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU test_that("roster-by-name", { # testthat::skip_if_offline("demo.dataverse.org") diff --git a/tests/testthat/tests-get_dataframe-original-basketball.R b/tests/testthat/tests-get_dataframe-original-basketball.R index df91f6a..c2d3a61 100644 --- a/tests/testthat/tests-get_dataframe-original-basketball.R +++ b/tests/testthat/tests-get_dataframe-original-basketball.R @@ -1,5 +1,5 @@ # See https://demo.dataverse.org/dataverse/dataverse-client-r -# https://doi.org/10.70122/FK2/HXJVJU +# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU # standarize_string <- function (x) { # substring(x, 1, 10) diff --git a/tests/testthat/tests-get_file.R b/tests/testthat/tests-get_file.R index 8bad37e..fe60747 100644 --- a/tests/testthat/tests-get_file.R +++ b/tests/testthat/tests-get_file.R @@ -1,5 +1,5 @@ # See https://demo.dataverse.org/dataverse/dataverse-client-r -# https://doi.org/10.70122/FK2/HXJVJU +# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU test_that("download tab from DOI and filename", { # testthat::skip_if_offline("demo.dataverse.org") diff --git a/tests/testthat/tests-get_file_metadata.R b/tests/testthat/tests-get_file_metadata.R index 582990b..d129436 100644 --- a/tests/testthat/tests-get_file_metadata.R +++ b/tests/testthat/tests-get_file_metadata.R @@ -1,5 +1,5 @@ # See https://demo.dataverse.org/dataverse/dataverse-client-r -# https://doi.org/10.70122/FK2/HXJVJU +# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU test_that("get file metadata from DOI and filename", { # testthat::skip_if_offline("demo.dataverse.org") From 28c0fc72b76a26a3170b1bf8ab76b0c70d089b62 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sat, 11 May 2024 22:14:16 -0400 Subject: [PATCH 16/40] Only error on error --- .github/workflows/R-CMD-check-dev.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check-dev.yaml b/.github/workflows/R-CMD-check-dev.yaml index ea60933..d1ee360 100644 --- a/.github/workflows/R-CMD-check-dev.yaml +++ b/.github/workflows/R-CMD-check-dev.yaml @@ -38,7 +38,7 @@ jobs: - name: Check run: | options(crayon.enabled = TRUE) - rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") + rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "error", check_dir = "check") shell: Rscript {0} - name: Test live dataverse (vignette) From b07a9befa01d9c6f3e7fb0a5184713c3962885c6 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 08:16:15 -0400 Subject: [PATCH 17/40] Run Rscript within R --- .github/workflows/R-CMD-check-dev.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check-dev.yaml b/.github/workflows/R-CMD-check-dev.yaml index d1ee360..c0fdb60 100644 --- a/.github/workflows/R-CMD-check-dev.yaml +++ b/.github/workflows/R-CMD-check-dev.yaml @@ -43,8 +43,9 @@ jobs: - name: Test live dataverse (vignette) run: | - Rscript -e "tests/B-search_ghaction.R" - Rscript -e "tests/C_download_ghaction.R" + source("tests/B-search_ghaction.R") + source("tests/C-download_ghaction.R") + shell: Rscript {0} - name: Test coverage run: covr::codecov() From 0685bf6da11caf91d350184c2ac3023640855712 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 09:56:14 -0400 Subject: [PATCH 18/40] try local per https://github.com/r-lib/actions/tree/v2/setup-r-dependencies#installing-the-local-package --- .github/workflows/R-CMD-check-dev.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/R-CMD-check-dev.yaml b/.github/workflows/R-CMD-check-dev.yaml index c0fdb60..7a9deee 100644 --- a/.github/workflows/R-CMD-check-dev.yaml +++ b/.github/workflows/R-CMD-check-dev.yaml @@ -42,6 +42,8 @@ jobs: shell: Rscript {0} - name: Test live dataverse (vignette) + with: + extra-packages: any::pkgdown, local::. run: | source("tests/B-search_ghaction.R") source("tests/C-download_ghaction.R") From 0b6f48a4abf89e2764a2ed8f8e1758f2a1fcded4 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 10:39:46 -0400 Subject: [PATCH 19/40] Try load_all, update versions --- .github/workflows/R-CMD-check-dev.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/R-CMD-check-dev.yaml b/.github/workflows/R-CMD-check-dev.yaml index 7a9deee..87af901 100644 --- a/.github/workflows/R-CMD-check-dev.yaml +++ b/.github/workflows/R-CMD-check-dev.yaml @@ -19,7 +19,8 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 + - uses: r-lib/actions/setup-r@v2 - uses: r-lib/actions/setup-r-dependencies@v2 with: @@ -29,7 +30,7 @@ jobs: - name: Cache R packages if: runner.os != 'Windows' - uses: actions/cache@v1 + uses: actions/cache@v3 with: path: ${{ env.R_LIBS_USER }} key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} @@ -42,9 +43,8 @@ jobs: shell: Rscript {0} - name: Test live dataverse (vignette) - with: - extra-packages: any::pkgdown, local::. run: | + devtools::load_all() source("tests/B-search_ghaction.R") source("tests/C-download_ghaction.R") shell: Rscript {0} From efa9477ecf4f3da221d6852b9dfe3561f114f557 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 11:07:15 -0400 Subject: [PATCH 20/40] Check if an error actually errors --- .github/workflows/R-CMD-check-dev.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/R-CMD-check-dev.yaml b/.github/workflows/R-CMD-check-dev.yaml index 87af901..7fff0bb 100644 --- a/.github/workflows/R-CMD-check-dev.yaml +++ b/.github/workflows/R-CMD-check-dev.yaml @@ -26,8 +26,6 @@ jobs: with: cache: "always" - - uses: r-lib/actions/setup-pandoc@v2 - - name: Cache R packages if: runner.os != 'Windows' uses: actions/cache@v3 @@ -42,11 +40,12 @@ jobs: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "error", check_dir = "check") shell: Rscript {0} - - name: Test live dataverse (vignette) + - name: Test live dataverse in vignettes run: | devtools::load_all() source("tests/B-search_ghaction.R") source("tests/C-download_ghaction.R") + TESTERROR shell: Rscript {0} - name: Test coverage From 2c792b19c2ada1c125a0fb09f4523ad864f5111f Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 11:13:27 -0400 Subject: [PATCH 21/40] Update all github workflows for #131 --- .github/workflows/R-CMD-check-daily.yaml | 12 ++++++++++-- .github/workflows/R-CMD-check-dev.yaml | 10 +++------- .github/workflows/R-CMD-check-thorough.yaml | 13 ++++++++++--- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/.github/workflows/R-CMD-check-daily.yaml b/.github/workflows/R-CMD-check-daily.yaml index f4ca804..ab332bc 100644 --- a/.github/workflows/R-CMD-check-daily.yaml +++ b/.github/workflows/R-CMD-check-daily.yaml @@ -3,7 +3,6 @@ on: schedule: - cron: "20 3 * * *" # Run every morning at 3:20am UTC - # - cron: "7 1 * * *" # Run every morning at 1:07am UTC (~8pm central) name: R-CMD-check-daily @@ -13,9 +12,11 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 - uses: r-lib/actions/setup-r-dependencies@v2 + with: + cache: "always" - uses: r-lib/actions/setup-pandoc@v2 - name: Query dependencies @@ -55,6 +56,13 @@ jobs: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") shell: Rscript {0} + - name: Test live dataverse in vignettes + run: | + devtools::load_all() + source("tests/B-search_ghaction.R") + source("tests/C-download_ghaction.R") + shell: Rscript {0} + - name: Test coverage run: covr::codecov() shell: Rscript {0} diff --git a/.github/workflows/R-CMD-check-dev.yaml b/.github/workflows/R-CMD-check-dev.yaml index 7fff0bb..f7b75ac 100644 --- a/.github/workflows/R-CMD-check-dev.yaml +++ b/.github/workflows/R-CMD-check-dev.yaml @@ -4,12 +4,9 @@ on: push: branches: - dev - # - main - # - master - # pull_request: - # branches: - # - main - # - master + pull_request: + branches: + - main name: R-CMD-check-dev @@ -45,7 +42,6 @@ jobs: devtools::load_all() source("tests/B-search_ghaction.R") source("tests/C-download_ghaction.R") - TESTERROR shell: Rscript {0} - name: Test coverage diff --git a/.github/workflows/R-CMD-check-thorough.yaml b/.github/workflows/R-CMD-check-thorough.yaml index 1606e59..ff2cf3e 100644 --- a/.github/workflows/R-CMD-check-thorough.yaml +++ b/.github/workflows/R-CMD-check-thorough.yaml @@ -8,13 +8,11 @@ on: push: branches: - main - - master # - dev pull_request: branches: - main - - master name: R-CMD-check-thorough @@ -43,7 +41,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 id: install-r @@ -52,6 +50,8 @@ jobs: http-user-agent: ${{ matrix.config.http-user-agent }} - uses: r-lib/actions/setup-r-dependencies@v2 + with: + cache: "always" - uses: r-lib/actions/setup-pandoc@v2 - name: Restore R package cache @@ -91,6 +91,13 @@ jobs: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") shell: Rscript {0} + - name: Test live dataverse in vignettes + run: | + devtools::load_all() + source("tests/B-search_ghaction.R") + source("tests/C-download_ghaction.R") + shell: Rscript {0} + - name: Show testthat output if: always() run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true From 45940d2a75ef5ca1583bee5a1fed4786a182d7c7 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 11:49:14 -0400 Subject: [PATCH 22/40] Indentation --- R/get_file.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/get_file.R b/R/get_file.R index 5dc5edc..63c3dfc 100644 --- a/R/get_file.R +++ b/R/get_file.R @@ -85,7 +85,7 @@ get_file <- function( dataset = NULL, format = c("original", "bundle"), vars = NULL, - return_url = FALSE, + return_url = FALSE, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), original = TRUE, @@ -151,7 +151,7 @@ get_file_by_name <- function( dataset, format = c("original", "bundle"), vars = NULL, - return_url = FALSE, + return_url = FALSE, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), original = TRUE, From 6bf639bfd4f14e6226029f4a700494509555a04b Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 12:13:32 -0400 Subject: [PATCH 23/40] Allow it to get query (original or not) --- R/get_file_by_id.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R index 2fb2a6f..d705d9e 100644 --- a/R/get_file_by_id.R +++ b/R/get_file_by_id.R @@ -95,7 +95,7 @@ get_file_by_id <- function( # If not bundle, request single file in non-bundle format ---- u <- paste0(api_url(server), u_part, fileid) if (return_url) { - return(u) + return(httr::modify_url(u, query = query)) } if (isFALSE(progress)) r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), query = query, ...) From 43d000b0b662b55498636dd5e8cf8588e1be69ac Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 12:14:34 -0400 Subject: [PATCH 24/40] Make it easier for users to get the URL by creating a convenience function get_url_* --- NAMESPACE | 4 ++ R/get_url.R | 123 +++++++++++++++++++++++++++++++++++++++++++++++++ man/URLs.Rd | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 257 insertions(+) create mode 100644 R/get_url.R create mode 100644 man/URLs.Rd diff --git a/NAMESPACE b/NAMESPACE index a007dab..01c64af 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,6 +38,10 @@ export(get_file_by_doi) export(get_file_by_id) export(get_file_by_name) export(get_file_metadata) +export(get_url) +export(get_url_by_doi) +export(get_url_by_id) +export(get_url_by_name) export(get_user_key) export(initiate_sword_dataset) export(list_datasets) diff --git a/R/get_url.R b/R/get_url.R new file mode 100644 index 0000000..913ce21 --- /dev/null +++ b/R/get_url.R @@ -0,0 +1,123 @@ +#' @title Get Dataverse file download URL +#' +#' @description Get URL. `get_url_*` functions return a download URL as a string +#' that can be then used in outside functions such as `curl::curl_download()`. +#' +#' @inheritParams get_file +#' @name URLs +#' @export +#' @examples \dontrun{ +#' # get URLs +#' get_url_by_name( +#' filename = "nlsw88.tab", +#' dataset = "10.70122/FK2/PPIAXE", +#' server = "demo.dataverse.org" +#' ) +#' # https://demo.dataverse.org/api/access/datafile/1734017?format=original +#' +#' For ingested, tab-delimited files +#' get_url_by_name( +#' filename = "nlsw88.tab", +#' dataset = "10.70122/FK2/PPIAXE", +#' original = FALSE, +#' server = "demo.dataverse.org" +#' ) +#' # https://demo.dataverse.org/api/access/datafile/1734017 +#' +#' # To download to local directory +#' curl::curl_download( +#' "https://demo.dataverse.org/api/access/datafile/1734017?format=original", +#' destfile = "nlsw88.dta") +#' } +get_url <- function( + file, + dataset = NULL, + format = c("original", "bundle"), + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + original = TRUE, + ...) { + + get_file( + fileid = prepend_doi(filedoi), + dataset = dataset, + format = format, + key = key, + server = server, + return_url = TRUE, + original = original, + ... + ) +} + +#' @rdname URLs +#' @export +get_url_by_name <- function( + filename, + dataset, + format = c("original", "bundle"), + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + original = TRUE, + ... +) { + format <- match.arg(format) + + get_file_by_name( + filename, + dataset, + format = format, + key = key, + server = server, + original = original, + return_url = TRUE, + ... + ) +} + +#' @rdname URLs +#' @export +get_url_by_id <- function( + fileid, + dataset = NULL, + format = c("original", "bundle"), + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + original = TRUE, + ...) { + + format <- match.arg(format) + get_file_by_id( + fileid, + dataset = NULL, + format = c("original", "bundle"), + key = key, + server = server, + original = original, + return_url = TRUE, + ... + ) +} + +#' @rdname URLs +#' @export +get_url_by_doi <- function( + filedoi, + dataset = NULL, + format = c("original", "bundle"), + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + original = TRUE, + ...) { + format <- match.arg(format) + get_file_by_doi( + filedoi, + dataset = NULL, + format = c("original", "bundle"), + key = key, + server = server, + original = original, + return_url = TRUE, + ... + ) +} diff --git a/man/URLs.Rd b/man/URLs.Rd new file mode 100644 index 0000000..819b547 --- /dev/null +++ b/man/URLs.Rd @@ -0,0 +1,130 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_url.R +\name{URLs} +\alias{URLs} +\alias{get_url} +\alias{get_url_by_name} +\alias{get_url_by_id} +\alias{get_url_by_doi} +\title{Get Dataverse file download URL} +\usage{ +get_url( + file, + dataset = NULL, + format = c("original", "bundle"), + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + original = TRUE, + ... +) + +get_url_by_name( + filename, + dataset, + format = c("original", "bundle"), + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + original = TRUE, + ... +) + +get_url_by_id( + fileid, + dataset = NULL, + format = c("original", "bundle"), + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + original = TRUE, + ... +) + +get_url_by_doi( + filedoi, + dataset = NULL, + format = c("original", "bundle"), + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + original = TRUE, + ... +) +} +\arguments{ +\item{file}{An integer specifying a file identifier; or a vector of integers +specifying file identifiers; or, if used with the prefix \code{"doi:"}, a +character with the file-specific DOI; or, if used without the prefix, a +filename accompanied by a dataset DOI in the \code{dataset} argument, or an object of +class \dQuote{dataverse_file} as returned by \code{\link{dataset_files}}. +Can be a vector for multiple files.} + +\item{dataset}{A character specifying a persistent identification ID for a dataset, +for example \code{"doi:10.70122/FK2/HXJVJU"}. Alternatively, an object of class +\dQuote{dataverse_dataset} obtained by \code{dataverse_contents()}.} + +\item{format}{A character string specifying a file format for download. +by default, this is \dQuote{original} (the original file format). If \code{NULL}, +no query is added, so ingested files are returned in their ingested TSV form. +For tabular datasets, the option \dQuote{bundle} downloads the bundle +of the original and archival versions, as well as the documentation. +See \url{https://guides.dataverse.org/en/latest/api/dataaccess.html} for details.} + +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} + +\item{server}{A character string specifying a Dataverse server. +Multiple Dataverse installations exist, with \code{"dataverse.harvard.edu"} being the +most major. The server can be defined each time within a function, or it can +be set as a default via an environment variable. To set a default, run +\code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")} +or add \verb{DATAVERSE_SERVER = "dataverse.harvard.edu} in one's \code{.Renviron} +file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} + +\item{original}{A logical, defaulting to TRUE. If a ingested (.tab) version is +available, download the original version instead of the ingested? If there was +no ingested version, is set to NA. Note in \verb{get_dataframe_*}, +\code{original} is set to FALSE by default. Either can be changed.} + +\item{...}{Additional arguments passed to an HTTP request function, such as +\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}.} + +\item{filename}{Filename of the dataset, with file extension as shown in Dataverse +(for example, if nlsw88.dta was the original but is displayed as the ingested +nlsw88.tab, use the ingested version.)} + +\item{fileid}{A numeric ID internally used for \code{get_file_by_id}. Can be a vector for multiple files.} + +\item{filedoi}{A DOI for a single file (not the entire dataset), of the form +\code{"10.70122/FK2/PPIAXE/MHDB0O"} or \code{"doi:10.70122/FK2/PPIAXE/MHDB0O"}. +Can be a vector for multiple files.} +} +\description{ +Get URL. \verb{get_url_*} functions return a download URL as a string +that can be then used in outside functions such as \code{curl::curl_download()}. +} +\examples{ +\dontrun{ +# get URLs +get_url_by_name( + filename = "nlsw88.tab", + dataset = "10.70122/FK2/PPIAXE", + server = "demo.dataverse.org" +) +# https://demo.dataverse.org/api/access/datafile/1734017?format=original + +For ingested, tab-delimited files +get_url_by_name( + filename = "nlsw88.tab", + dataset = "10.70122/FK2/PPIAXE", + original = FALSE, + server = "demo.dataverse.org" +) +# https://demo.dataverse.org/api/access/datafile/1734017 + +# To download to local directory +curl::curl_download( + "https://demo.dataverse.org/api/access/datafile/1734017?format=original", + destfile = "nlsw88.dta") +} +} From 490ead432e6406f95191f6ef9a83dcdeedaf524e Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 12:17:50 -0400 Subject: [PATCH 25/40] Do not allow users to do get_dataframe(..., return_url = TRUE) --- R/get_dataframe.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_dataframe.R b/R/get_dataframe.R index 50034bc..bba2d9b 100644 --- a/R/get_dataframe.R +++ b/R/get_dataframe.R @@ -149,7 +149,7 @@ get_dataframe_by_id <- function( } # READ raw data - raw <- get_file(file = fileid, original = original, ...) + raw <- get_file(file = fileid, original = original, return_url = FALSE, ...) # save to temp and then read it in with supplied function if (!is.null(.f)) { From 1dcddd0f29dc574199c43332998710c467df4738 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 12:23:09 -0400 Subject: [PATCH 26/40] Typo comment --- R/get_url.R | 2 +- man/URLs.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/get_url.R b/R/get_url.R index 913ce21..e37b2a0 100644 --- a/R/get_url.R +++ b/R/get_url.R @@ -15,7 +15,7 @@ #' ) #' # https://demo.dataverse.org/api/access/datafile/1734017?format=original #' -#' For ingested, tab-delimited files +#' # For ingested, tab-delimited files #' get_url_by_name( #' filename = "nlsw88.tab", #' dataset = "10.70122/FK2/PPIAXE", diff --git a/man/URLs.Rd b/man/URLs.Rd index 819b547..a884ca1 100644 --- a/man/URLs.Rd +++ b/man/URLs.Rd @@ -113,7 +113,7 @@ get_url_by_name( ) # https://demo.dataverse.org/api/access/datafile/1734017?format=original -For ingested, tab-delimited files +# For ingested, tab-delimited files get_url_by_name( filename = "nlsw88.tab", dataset = "10.70122/FK2/PPIAXE", From d7b39a3b81dbbe930cf5f7fa5d93d1908a0cd93a Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 12:25:18 -0400 Subject: [PATCH 27/40] aut to ctb per https://r-pkgs.org/description.html#sec-description-authors-at-r --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 154609f..e642bc0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,7 +35,7 @@ Authors@R: role = "ctb"), person(given = "Johannes", family = "Gruber", - role = c("aut"), + role = c("ctb"), email = "JohannesB.Gruber@gmail.com", comment = c(ORCID = "0000-0001-9177-1772"))) Imports: From 94ff7873b4c1ee61928f05faf7d391ce5d903a45 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 12:34:45 -0400 Subject: [PATCH 28/40] Write tests and amend to get `original` --- tests/testthat/tests-get_file.R | 8 -------- tests/testthat/tests-get_url.R | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) create mode 100644 tests/testthat/tests-get_url.R diff --git a/tests/testthat/tests-get_file.R b/tests/testthat/tests-get_file.R index 2765556..fe60747 100644 --- a/tests/testthat/tests-get_file.R +++ b/tests/testthat/tests-get_file.R @@ -54,11 +54,3 @@ test_that("More informative error message when file does not exist", { # wrong server expect_error(get_file(2972336, server = "demo.dataverse.org"), regexp = "API") }) - -# Informative error message (PR #30) -test_that("Return just URL", { - testthat::skip_on_cran() - expect_equal(get_file(c(1734005, 1734006), format = "original", server = "demo.dataverse.org", return_url = TRUE), - list("https://demo.dataverse.org/api/access/datafile/1734005", - "https://demo.dataverse.org/api/access/datafile/1734006")) -}) diff --git a/tests/testthat/tests-get_url.R b/tests/testthat/tests-get_url.R new file mode 100644 index 0000000..8f06637 --- /dev/null +++ b/tests/testthat/tests-get_url.R @@ -0,0 +1,19 @@ +# Informative error message (PR #30) +test_that("Return just URL", { + testthat::skip_on_cran() + expect_equal( + get_file(c(1734005, 1734006), + server = "demo.dataverse.org", + original = TRUE, + return_url = TRUE), + list("https://demo.dataverse.org/api/access/datafile/1734005?format=original", + "https://demo.dataverse.org/api/access/datafile/1734006")) + + expect_equal( + get_url_by_name( + filename = "nlsw88.tab", + dataset = "10.70122/FK2/PPIAXE", + server = "demo.dataverse.org" + ), + expected = "httpshttps://demo.dataverse.org/api/access/datafile/1734017?format=original") +}) From 369b5b7795306cc996a43724be350d0aa195d580 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 12:35:09 -0400 Subject: [PATCH 29/40] Bug - wrong argument --- R/get_url.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/get_url.R b/R/get_url.R index e37b2a0..03c74f3 100644 --- a/R/get_url.R +++ b/R/get_url.R @@ -39,7 +39,7 @@ get_url <- function( ...) { get_file( - fileid = prepend_doi(filedoi), + fileid = file, dataset = dataset, format = format, key = key, @@ -109,6 +109,7 @@ get_url_by_doi <- function( server = Sys.getenv("DATAVERSE_SERVER"), original = TRUE, ...) { + format <- match.arg(format) get_file_by_doi( filedoi, From c03728f3b4ec3dc32cb815cc0e09b2b3cb5c11bc Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 12:42:09 -0400 Subject: [PATCH 30/40] Version up for CRAN submission --- DESCRIPTION | 2 +- cran-comments.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e642bc0..0a38d9b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: dataverse -Version: 0.3.13.9000 +Version: 0.3.14 Title: Client for Dataverse 4+ Repositories Authors@R: c(person(given = "Shiro", diff --git a/cran-comments.md b/cran-comments.md index f9cc69c..7b11fa8 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,7 +1,7 @@ Description ----------------------------------------------- -This version fixes an outdated token error reported on March 2023. No other changes were made. +This version removes remote resources from vignette per CRAN policy, and makes several other usability improvements. Shiro Kuriwaki From aef52ab660ff0df9caa5b271012ba63ff85028bb Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 12:43:19 -0400 Subject: [PATCH 31/40] Updates NEWS --- NEWS.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index edb5dfc..50b86d9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,8 +2,9 @@ # CHANGES in dataverse 0.3.14 -* `get_file_by_*()` can now return the download URL to be used in external functions or programs, useful for large files (#128) -* Improve recommendation for rdata loading (#107) +* Improve recommendation for rdata loading (#107, #127) +* `get_file_by_*()` can now return the download URL to be used in external functions or programs, useful for large files (#128, implemented in #129 @JBGruber and @kuriwaki) +* Removes remote resource from vignette and move them to ghactions (#131) # CHANGES in dataverse 0.3.12 and 0.3.13 From 47dfb2f313bd3f2ef7cc87c5baba2e9fd5b0f6bd Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 12:45:31 -0400 Subject: [PATCH 32/40] Test typo --- tests/testthat/tests-get_url.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/tests-get_url.R b/tests/testthat/tests-get_url.R index 8f06637..33a81b2 100644 --- a/tests/testthat/tests-get_url.R +++ b/tests/testthat/tests-get_url.R @@ -15,5 +15,5 @@ test_that("Return just URL", { dataset = "10.70122/FK2/PPIAXE", server = "demo.dataverse.org" ), - expected = "httpshttps://demo.dataverse.org/api/access/datafile/1734017?format=original") + expected = "https://demo.dataverse.org/api/access/datafile/1734017?format=original") }) From 9384ef371e7a3e26ce7c7023ec027bb415b08cdb Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 14:56:27 -0400 Subject: [PATCH 33/40] Update description of get_url() --- R/get_file.R | 1 - R/get_file_by_id.R | 3 ++- R/get_url.R | 13 +++++++++++-- man/URLs.Rd | 13 +++++++++++-- man/files.Rd | 3 ++- man/get_dataframe.Rd | 3 ++- 6 files changed, 28 insertions(+), 8 deletions(-) diff --git a/R/get_file.R b/R/get_file.R index 63c3dfc..cf2fd92 100644 --- a/R/get_file.R +++ b/R/get_file.R @@ -143,7 +143,6 @@ get_file <- function( #' @param filename Filename of the dataset, with file extension as shown in Dataverse #' (for example, if nlsw88.dta was the original but is displayed as the ingested #' nlsw88.tab, use the ingested version.) -#' @param return_url Instead of downloading the file, just return the download link. #' #' @export get_file_by_name <- function( diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R index d705d9e..3c24c85 100644 --- a/R/get_file_by_id.R +++ b/R/get_file_by_id.R @@ -8,7 +8,8 @@ #' @param progress Whether to show a progress bar of the download. #' If not specified, will be set to `TRUE` for a file larger than 100MB. To fix #' a value, set `FALSE` or `TRUE`. -#' @param return_url Instead of downloading the file, just return the download link. +#' @param return_url Instead of downloading the file, return the URL for download. +#' Defaults to `FALSE`. #' #' @export get_file_by_id <- function( diff --git a/R/get_url.R b/R/get_url.R index 03c74f3..b45d8bb 100644 --- a/R/get_url.R +++ b/R/get_url.R @@ -1,10 +1,19 @@ #' @title Get Dataverse file download URL #' -#' @description Get URL. `get_url_*` functions return a download URL as a string -#' that can be then used in outside functions such as `curl::curl_download()`. +#' @description Get URL of associated file. `get_url_*` functions return a URL as +#' a string. This can be then used in other functions such as `curl::curl_download()`. +#' +#' @details +#' This function does not download the associated data. +#' In contrast, `get_dataframe()` downloads the requested file to a tempfile, and then uses R +#' to read it. And `get_file(.., return_url = FALSE)` reads the binary file into +#' R's memory with `httr::GET()`. `get_url()` simply return the URL for download. +#' #' #' @inheritParams get_file #' @name URLs +#' +#' @returns A string or a list of strings that are URLs. #' @export #' @examples \dontrun{ #' # get URLs diff --git a/man/URLs.Rd b/man/URLs.Rd index a884ca1..822b272 100644 --- a/man/URLs.Rd +++ b/man/URLs.Rd @@ -99,9 +99,18 @@ nlsw88.tab, use the ingested version.)} \code{"10.70122/FK2/PPIAXE/MHDB0O"} or \code{"doi:10.70122/FK2/PPIAXE/MHDB0O"}. Can be a vector for multiple files.} } +\value{ +A string or a list of strings that are URLs. +} \description{ -Get URL. \verb{get_url_*} functions return a download URL as a string -that can be then used in outside functions such as \code{curl::curl_download()}. +Get URL of associated file. \verb{get_url_*} functions return a URL as +a string. This can be then used in other functions such as \code{curl::curl_download()}. +} +\details{ +This function does not download the associated data. +In contrast, \code{get_dataframe()} downloads the requested file to a tempfile, and then uses R +to read it. And \code{get_file(.., return_url = FALSE)} reads the binary file into +R's memory with \code{httr::GET()}. \code{get_url()} simply return the URL for download. } \examples{ \dontrun{ diff --git a/man/files.Rd b/man/files.Rd index 71bfa51..691ac3d 100644 --- a/man/files.Rd +++ b/man/files.Rd @@ -78,7 +78,8 @@ See \url{https://guides.dataverse.org/en/latest/api/dataaccess.html} for details \item{vars}{A character vector specifying one or more variable names, used to extract a subset of the data.} -\item{return_url}{Instead of downloading the file, just return the download link.} +\item{return_url}{Instead of downloading the file, return the URL for download. +Defaults to \code{FALSE}.} \item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. diff --git a/man/get_dataframe.Rd b/man/get_dataframe.Rd index 1f5baf9..a4a2f90 100644 --- a/man/get_dataframe.Rd +++ b/man/get_dataframe.Rd @@ -64,7 +64,8 @@ be set as a default via an environment variable. To set a default, run \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")} or add \verb{DATAVERSE_SERVER = "dataverse.harvard.edu} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} - \item{\code{return_url}}{Instead of downloading the file, just return the download link.} + \item{\code{return_url}}{Instead of downloading the file, return the URL for download. +Defaults to \code{FALSE}.} }} \item{fileid}{A numeric ID internally used for \code{get_file_by_id}. Can be a vector for multiple files.} From 029052e60665933d292a7398ba729de473f33884 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 15:07:09 -0400 Subject: [PATCH 34/40] Indetation (style) --- R/add_dataset_file.R | 60 +++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/R/add_dataset_file.R b/R/add_dataset_file.R index 089b394..02fb43b 100644 --- a/R/add_dataset_file.R +++ b/R/add_dataset_file.R @@ -59,17 +59,17 @@ #' } #' @export add_dataset_file <- -function(file, - dataset, - description = NULL, - key = Sys.getenv("DATAVERSE_KEY"), - server = Sys.getenv("DATAVERSE_SERVER"), - ...) { + function(file, + dataset, + description = NULL, + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + ...) { dataset <- dataset_id(dataset, key = key, server = server, ...) bod2 <- list(forceReplace = force) if (!is.null(description)) { - bod2$description <- description + bod2$description <- description } jsondata <- as.character(jsonlite::toJSON(bod2, auto_unbox = TRUE)) @@ -81,44 +81,48 @@ function(file, httr::stop_for_status(r, task = httr::content(r)$message) out <- jsonlite::fromJSON(httr::content(r, "text", encoding = "UTF-8")) out$data$files$dataFile$id[1L] -} + } #' @rdname add_dataset_file #' @export update_dataset_file <- -function(file, - dataset = NULL, - id, - description = NULL, - force = TRUE, - key = Sys.getenv("DATAVERSE_KEY"), - server = Sys.getenv("DATAVERSE_SERVER"), - ...) { + function(file, + dataset = NULL, + id, + description = NULL, + force = TRUE, + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + ...) { dataset <- dataset_id(dataset, key = key, server = server, ...) # get file ID from 'dataset' if (!is.numeric(id)) { - if (inherits(id, "dataverse_file")) { - id <- get_fileid(id, key = key, server = server) - } else if (is.null(dataset)) { - stop("When 'id' is a character string, dataset must be specified. Or, use a global fileid instead.") - } else { - id <- get_fileid(dataset, id, key = key, server = server, ...) - } + if (inherits(id, "dataverse_file")) { + id <- get_fileid(id, key = key, server = server) + } else if (is.null(dataset)) { + stop("When 'id' is a character string, dataset must be specified. Or, use a global fileid instead.") + } else { + id <- get_fileid(dataset, id, key = key, server = server, ...) + } } bod2 <- list(forceReplace = force) if (!is.null(description)) { - bod2$description <- description + bod2$description <- description } jsondata <- as.character(jsonlite::toJSON(bod2, auto_unbox = TRUE)) u <- paste0(api_url(server), "files/", id, "/replace") - r <- httr::POST(u, httr::add_headers("X-Dataverse-key" = key), ..., + r <- httr::POST(u, + httr::add_headers("X-Dataverse-key" = key), ..., body = list(file = httr::upload_file(file), jsonData = jsondata - ), + ), encode = "multipart") httr::stop_for_status(r, task = httr::content(r)$message) - structure(jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)$data$files[[1L]], class = "dataverse_file") -} + structure(jsonlite::fromJSON( + httr::content(r, as = "text", encoding = "UTF-8"), + simplifyDataFrame = FALSE)$data$files[[1L]], class = "dataverse_file" + ) + } From 1dc5cb1888d4761ad0475a4daaf5ef542307567c Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 15:10:14 -0400 Subject: [PATCH 35/40] Update master/main in README --- README.Rmd | 8 ++++---- README.md | 15 ++++++++------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/README.Rmd b/README.Rmd index 8717786..05e5f5b 100644 --- a/README.Rmd +++ b/README.Rmd @@ -12,10 +12,10 @@ Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") [![CRAN Version](https://www.r-pkg.org/badges/version/dataverse)](https://cran.r-project.org/package=dataverse) ![Downloads](https://cranlogs.r-pkg.org/badges/dataverse) -[![R-CMD-check-thorough](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml/badge.svg?branch=master)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml) -[![R-CMD-check-daily](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml/badge.svg?branch=master)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml) +[![R-CMD-check-thorough](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml/badge.svg?branch=main)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml) +[![R-CMD-check-daily](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml/badge.svg?branch=main)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml) [![R-CMD-check-dev](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-dev.yaml/badge.svg)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-dev.yaml) -[![codecov.io](https://codecov.io/github/IQSS/dataverse-client-r/coverage.svg?branch=master)](https://codecov.io/github/IQSS/dataverse-client-r?branch=master) +[![codecov.io](https://codecov.io/github/IQSS/dataverse-client-r/coverage.svg?branch=main)](https://app.codecov.io/github/IQSS/dataverse-client-r?branch=main) [![Dataverse Project logo](https://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png)](https://dataverse.org) @@ -208,7 +208,7 @@ Functions related to user management and permissions are currently not exported Dataverse clients in other programming languages include [pyDataverse](https://pydataverse.readthedocs.io/en/latest/) for Python and the [Java client](https://github.com/IQSS/dataverse-client-java). For more information, see [the Dataverse API page](https://guides.dataverse.org/en/5.5/api/client-libraries.html#r). -Users interested in downloading metadata from archives other than Dataverse may be interested in Kurt Hornik's [OAIHarvester](https://cran.r-project.org/package=OAIHarvester) and Scott Chamberlain's [oai](https://cran.r-project.org/package=oai), which offer metadata download from any web repository that is compliant with the [Open Archives Initiative](http://www.openarchives.org/) standards. Additionally, [rdryad](https://cran.r-project.org/package=rdryad) uses OAIHarvester to interface with [Dryad](https://datadryad.org/stash). The [rfigshare](https://cran.r-project.org/package=rfigshare) package works in a similar spirit to **dataverse** with . +Users interested in downloading metadata from archives other than Dataverse may be interested in Kurt Hornik's [OAIHarvester](https://cran.r-project.org/package=OAIHarvester) and Scott Chamberlain's [oai](https://cran.r-project.org/package=oai), which offer metadata download from any web repository that is compliant with the [Open Archives Initiative](https://www.openarchives.org:443/) standards. Additionally, [rdryad](https://cran.r-project.org/package=rdryad) uses OAIHarvester to interface with [Dryad](https://datadryad.org/stash). The [rfigshare](https://cran.r-project.org/package=rfigshare) package works in a similar spirit to **dataverse** with . ### More Information diff --git a/README.md b/README.md index ec77188..4433f16 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,10 @@ R Client for Dataverse Repositories Version](https://www.r-pkg.org/badges/version/dataverse)](https://cran.r-project.org/package=dataverse) ![Downloads](https://cranlogs.r-pkg.org/badges/dataverse) -[![R-CMD-check-thorough](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml/badge.svg?branch=master)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml) -[![R-CMD-check-daily](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml/badge.svg?branch=master)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml) +[![R-CMD-check-thorough](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml/badge.svg?branch=main)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml) +[![R-CMD-check-daily](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml/badge.svg?branch=main)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml) [![R-CMD-check-dev](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-dev.yaml/badge.svg)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-dev.yaml) -[![codecov.io](https://codecov.io/github/IQSS/dataverse-client-r/coverage.svg?branch=master)](https://codecov.io/github/IQSS/dataverse-client-r?branch=master) +[![codecov.io](https://codecov.io/github/IQSS/dataverse-client-r/coverage.svg?branch=main)](https://app.codecov.io/github/IQSS/dataverse-client-r?branch=main) [![Dataverse Project logo](https://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png)](https://dataverse.org) @@ -54,7 +54,7 @@ variable called `DATAVERSE_KEY`. It can be set as a default by adding DATAVERSE_KEY="examplekey12345" ``` -in your .Renviron file, where `examplekey12345` should be replace with +in your .Renviron file, where `examplekey12345` should be replaced with your own key. The environment file can be opened by `usethis::edit_r_environ()`. @@ -285,9 +285,10 @@ Dataverse may be interested in Kurt Hornik’s [OAIHarvester](https://cran.r-project.org/package=OAIHarvester) and Scott Chamberlain’s [oai](https://cran.r-project.org/package=oai), which offer metadata download from any web repository that is compliant with -the [Open Archives Initiative](http://www.openarchives.org/) standards. -Additionally, [rdryad](https://cran.r-project.org/package=rdryad) uses -OAIHarvester to interface with [Dryad](https://datadryad.org/stash). The +the [Open Archives Initiative](https://www.openarchives.org:443/) +standards. Additionally, +[rdryad](https://cran.r-project.org/package=rdryad) uses OAIHarvester to +interface with [Dryad](https://datadryad.org/stash). The [rfigshare](https://cran.r-project.org/package=rfigshare) package works in a similar spirit to **dataverse** with . From 9212d58f0b2f1c5e9a66edd9ffb32a3b3a3aa2da Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 15:10:25 -0400 Subject: [PATCH 36/40] New rhub option change --- for-developers/developer-tasks.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/for-developers/developer-tasks.R b/for-developers/developer-tasks.R index 431229a..477864d 100644 --- a/for-developers/developer-tasks.R +++ b/for-developers/developer-tasks.R @@ -34,6 +34,6 @@ devtools::check( # Equivalent of R-hub remote = TRUE, incoming = TRUE ) -# devtools::check_rhub(email = "shirokuriwaki@gmail.com", env_vars = c(R_COMPILE_AND_INSTALL_PACKAGES = "always")) -# devtools::check_win_devel() # CRAN submission policies encourage the development version +devtools::check_rhub(email = "shirokuriwaki@gmail.com") +devtools::check_win_devel() # CRAN submission policies encourage the development version # devtools::release(check = FALSE) # Careful, the last question ultimately uploads it to CRAN, where you can't delete/reverse your decision. From 10ea7593f49696b5d166b0bb4e8b6c57b27c2338 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 16:52:15 -0400 Subject: [PATCH 37/40] Correct yaml for thorough --- .github/workflows/R-CMD-check-thorough.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check-thorough.yaml b/.github/workflows/R-CMD-check-thorough.yaml index ff2cf3e..6f780b8 100644 --- a/.github/workflows/R-CMD-check-thorough.yaml +++ b/.github/workflows/R-CMD-check-thorough.yaml @@ -50,8 +50,8 @@ jobs: http-user-agent: ${{ matrix.config.http-user-agent }} - uses: r-lib/actions/setup-r-dependencies@v2 - with: - cache: "always" + with: + cache: "always" - uses: r-lib/actions/setup-pandoc@v2 - name: Restore R package cache From 1ecd76441de78d6b7dbe44df0b45933cc193a1e1 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 16:53:40 -0400 Subject: [PATCH 38/40] Only run on PRs to main --- .github/workflows/R-CMD-check-thorough.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/R-CMD-check-thorough.yaml b/.github/workflows/R-CMD-check-thorough.yaml index 6f780b8..791b7fd 100644 --- a/.github/workflows/R-CMD-check-thorough.yaml +++ b/.github/workflows/R-CMD-check-thorough.yaml @@ -5,11 +5,6 @@ # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions on: - push: - branches: - - main - # - dev - pull_request: branches: - main From 465e0ad0b5f6c6a81344e9455c4ecee0804db0e2 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 16:54:14 -0400 Subject: [PATCH 39/40] Fix yaml issue with daily --- .github/workflows/R-CMD-check-daily.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check-daily.yaml b/.github/workflows/R-CMD-check-daily.yaml index ab332bc..d1a8fcb 100644 --- a/.github/workflows/R-CMD-check-daily.yaml +++ b/.github/workflows/R-CMD-check-daily.yaml @@ -15,8 +15,8 @@ jobs: - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 - uses: r-lib/actions/setup-r-dependencies@v2 - with: - cache: "always" + with: + cache: "always" - uses: r-lib/actions/setup-pandoc@v2 - name: Query dependencies From 720613b2eecec6c989105a57c335dfadb6c2f009 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sun, 12 May 2024 17:39:39 -0400 Subject: [PATCH 40/40] Update with good tests --- cran-comments.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cran-comments.md b/cran-comments.md index 7b11fa8..1083abd 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,7 +1,7 @@ Description ----------------------------------------------- -This version removes remote resources from vignette per CRAN policy, and makes several other usability improvements. +This version (0.3.14) removes remote resources from vignette per CRAN policy, and makes several other usability improvements. Shiro Kuriwaki @@ -9,15 +9,14 @@ Shiro Kuriwaki Test environments ----------------------------------------------- -1. [win-builder](https://win-builder.r-project.org/QhQR4q21BLc1), development version (`devtools::check_win_devel()`) -2. [R-hub](https://builder.r-hub.io/status/dataverse_0.3.13.tar.gz-a910246e058d4fdea677a3e29278dfbf). -3. [GitHub Actions](https://github.com/IQSS/dataverse-client-r/actions) +1. [win-builder](https://win-builder.r-project.org/FQhpeR7xF2O1), development version (`devtools::check_win_devel()`) +2. [GitHub Actions](https://github.com/IQSS/dataverse-client-r/actions/runs/9054158346) * os: macOS-latest , r: 'release' * os: windows-latest, r: 'devel' * os: windows-latest, r: 'release' * os: ubuntu-20.04 , r: 'devel' * os: ubuntu-20.04 , r: 'release' -4. Local macOS, R 4.2.1 +3. Local macOS, R 4.3.3 R CMD check results