diff --git a/.Rbuildignore b/.Rbuildignore
index 273e558..e9e5a8b 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -6,6 +6,7 @@ man-roxygen/*
^README\.Rmd$
^README\.html$
^CONTRIBUTING\.md$
+tests/.*_ghaction.R
^vignettes/figure$
^vignettes/figure/.+$
\.Rmd2$
diff --git a/.github/workflows/R-CMD-check-daily.yaml b/.github/workflows/R-CMD-check-daily.yaml
index f4ca804..d1a8fcb 100644
--- a/.github/workflows/R-CMD-check-daily.yaml
+++ b/.github/workflows/R-CMD-check-daily.yaml
@@ -3,7 +3,6 @@
on:
schedule:
- cron: "20 3 * * *" # Run every morning at 3:20am UTC
- # - cron: "7 1 * * *" # Run every morning at 1:07am UTC (~8pm central)
name: R-CMD-check-daily
@@ -13,9 +12,11 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- uses: r-lib/actions/setup-r@v2
- uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ cache: "always"
- uses: r-lib/actions/setup-pandoc@v2
- name: Query dependencies
@@ -55,6 +56,13 @@ jobs:
rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
shell: Rscript {0}
+ - name: Test live dataverse in vignettes
+ run: |
+ devtools::load_all()
+ source("tests/B-search_ghaction.R")
+ source("tests/C-download_ghaction.R")
+ shell: Rscript {0}
+
- name: Test coverage
run: covr::codecov()
shell: Rscript {0}
diff --git a/.github/workflows/R-CMD-check-dev.yaml b/.github/workflows/R-CMD-check-dev.yaml
index c4692a5..f7b75ac 100644
--- a/.github/workflows/R-CMD-check-dev.yaml
+++ b/.github/workflows/R-CMD-check-dev.yaml
@@ -4,12 +4,9 @@ on:
push:
branches:
- dev
- # - main
- # - master
- # pull_request:
- # branches:
- # - main
- # - master
+ pull_request:
+ branches:
+ - main
name: R-CMD-check-dev
@@ -19,32 +16,32 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
+
- uses: r-lib/actions/setup-r@v2
- uses: r-lib/actions/setup-r-dependencies@v2
- - uses: r-lib/actions/setup-pandoc@v2
+ with:
+ cache: "always"
- name: Cache R packages
if: runner.os != 'Windows'
- uses: actions/cache@v1
+ uses: actions/cache@v3
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
- - name: Install system dependencies
- if: runner.os == 'Linux'
- env:
- RHUB_PLATFORM: linux-x86_64-ubuntu-gcc
- run: |
- Rscript -e "remotes::install_github('r-hub/sysreqs')"
- sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))")
- sudo -s eval "$sysreqs"
-
- name: Check
run: |
options(crayon.enabled = TRUE)
- rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
+ rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "error", check_dir = "check")
+ shell: Rscript {0}
+
+ - name: Test live dataverse in vignettes
+ run: |
+ devtools::load_all()
+ source("tests/B-search_ghaction.R")
+ source("tests/C-download_ghaction.R")
shell: Rscript {0}
- name: Test coverage
diff --git a/.github/workflows/R-CMD-check-thorough.yaml b/.github/workflows/R-CMD-check-thorough.yaml
index 1606e59..791b7fd 100644
--- a/.github/workflows/R-CMD-check-thorough.yaml
+++ b/.github/workflows/R-CMD-check-thorough.yaml
@@ -5,16 +5,9 @@
# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
on:
- push:
- branches:
- - main
- - master
- # - dev
-
pull_request:
branches:
- main
- - master
name: R-CMD-check-thorough
@@ -43,7 +36,7 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- uses: r-lib/actions/setup-r@v2
id: install-r
@@ -52,6 +45,8 @@ jobs:
http-user-agent: ${{ matrix.config.http-user-agent }}
- uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ cache: "always"
- uses: r-lib/actions/setup-pandoc@v2
- name: Restore R package cache
@@ -91,6 +86,13 @@ jobs:
rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
shell: Rscript {0}
+ - name: Test live dataverse in vignettes
+ run: |
+ devtools::load_all()
+ source("tests/B-search_ghaction.R")
+ source("tests/C-download_ghaction.R")
+ shell: Rscript {0}
+
- name: Show testthat output
if: always()
run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true
diff --git a/DESCRIPTION b/DESCRIPTION
index b8cc4ef..0a38d9b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
Package: dataverse
-Version: 0.3.13
+Version: 0.3.14
Title: Client for Dataverse 4+ Repositories
Authors@R:
c(person(given = "Shiro",
@@ -32,7 +32,12 @@ Authors@R:
role = "ctb"),
person(given = "Edward",
family = "Jee",
- role = "ctb"))
+ role = "ctb"),
+ person(given = "Johannes",
+ family = "Gruber",
+ role = c("ctb"),
+ email = "JohannesB.Gruber@gmail.com",
+ comment = c(ORCID = "0000-0001-9177-1772")))
Imports:
checkmate,
httr,
@@ -59,6 +64,6 @@ URL: https://iqss.github.io/dataverse-client-r/, https://dataverse.org/, https:/
BugReports: https://github.com/iqss/dataverse-client-r/issues
VignetteBuilder: knitr
Encoding: UTF-8
-RoxygenNote: 7.2.0
+RoxygenNote: 7.2.3
Roxygen: list(markdown = TRUE)
Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index a007dab..01c64af 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -38,6 +38,10 @@ export(get_file_by_doi)
export(get_file_by_id)
export(get_file_by_name)
export(get_file_metadata)
+export(get_url)
+export(get_url_by_doi)
+export(get_url_by_id)
+export(get_url_by_name)
export(get_user_key)
export(initiate_sword_dataset)
export(list_datasets)
diff --git a/NEWS.md b/NEWS.md
index 2a31a92..50b86d9 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,11 @@
# dataverse
+# CHANGES in dataverse 0.3.14
+
+* Improve recommendation for rdata loading (#107, #127)
+* `get_file_by_*()` can now return the download URL to be used in external functions or programs, useful for large files (#128, implemented in #129 @JBGruber and @kuriwaki)
+* Removes remote resource from vignette and move them to ghactions (#131)
+
# CHANGES in dataverse 0.3.12 and 0.3.13
* Update expired token (#123)
diff --git a/R/add_dataset_file.R b/R/add_dataset_file.R
index 089b394..02fb43b 100644
--- a/R/add_dataset_file.R
+++ b/R/add_dataset_file.R
@@ -59,17 +59,17 @@
#' }
#' @export
add_dataset_file <-
-function(file,
- dataset,
- description = NULL,
- key = Sys.getenv("DATAVERSE_KEY"),
- server = Sys.getenv("DATAVERSE_SERVER"),
- ...) {
+ function(file,
+ dataset,
+ description = NULL,
+ key = Sys.getenv("DATAVERSE_KEY"),
+ server = Sys.getenv("DATAVERSE_SERVER"),
+ ...) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
bod2 <- list(forceReplace = force)
if (!is.null(description)) {
- bod2$description <- description
+ bod2$description <- description
}
jsondata <- as.character(jsonlite::toJSON(bod2, auto_unbox = TRUE))
@@ -81,44 +81,48 @@ function(file,
httr::stop_for_status(r, task = httr::content(r)$message)
out <- jsonlite::fromJSON(httr::content(r, "text", encoding = "UTF-8"))
out$data$files$dataFile$id[1L]
-}
+ }
#' @rdname add_dataset_file
#' @export
update_dataset_file <-
-function(file,
- dataset = NULL,
- id,
- description = NULL,
- force = TRUE,
- key = Sys.getenv("DATAVERSE_KEY"),
- server = Sys.getenv("DATAVERSE_SERVER"),
- ...) {
+ function(file,
+ dataset = NULL,
+ id,
+ description = NULL,
+ force = TRUE,
+ key = Sys.getenv("DATAVERSE_KEY"),
+ server = Sys.getenv("DATAVERSE_SERVER"),
+ ...) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
# get file ID from 'dataset'
if (!is.numeric(id)) {
- if (inherits(id, "dataverse_file")) {
- id <- get_fileid(id, key = key, server = server)
- } else if (is.null(dataset)) {
- stop("When 'id' is a character string, dataset must be specified. Or, use a global fileid instead.")
- } else {
- id <- get_fileid(dataset, id, key = key, server = server, ...)
- }
+ if (inherits(id, "dataverse_file")) {
+ id <- get_fileid(id, key = key, server = server)
+ } else if (is.null(dataset)) {
+ stop("When 'id' is a character string, dataset must be specified. Or, use a global fileid instead.")
+ } else {
+ id <- get_fileid(dataset, id, key = key, server = server, ...)
+ }
}
bod2 <- list(forceReplace = force)
if (!is.null(description)) {
- bod2$description <- description
+ bod2$description <- description
}
jsondata <- as.character(jsonlite::toJSON(bod2, auto_unbox = TRUE))
u <- paste0(api_url(server), "files/", id, "/replace")
- r <- httr::POST(u, httr::add_headers("X-Dataverse-key" = key), ...,
+ r <- httr::POST(u,
+ httr::add_headers("X-Dataverse-key" = key), ...,
body = list(file = httr::upload_file(file),
jsonData = jsondata
- ),
+ ),
encode = "multipart")
httr::stop_for_status(r, task = httr::content(r)$message)
- structure(jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)$data$files[[1L]], class = "dataverse_file")
-}
+ structure(jsonlite::fromJSON(
+ httr::content(r, as = "text", encoding = "UTF-8"),
+ simplifyDataFrame = FALSE)$data$files[[1L]], class = "dataverse_file"
+ )
+ }
diff --git a/R/get_dataframe.R b/R/get_dataframe.R
index c8c9366..bba2d9b 100644
--- a/R/get_dataframe.R
+++ b/R/get_dataframe.R
@@ -84,15 +84,13 @@
#'
#' # 3. RData files are read in by `base::load()` but cannot be assigned to an
#' # object name. The following shows two possible ways to read in such files.
+#' # First, the RData object can be loaded to the environment without object assignment.
#'
-#' # First, without relying on `get_dataframe_*`, write as a binary file:
-#' as_binary <- get_file_by_doi(
-#' filedoi = "doi:10.70122/FK2/PPIAXE/5VPXKE",
-#' server = "demo.dataverse.org")
-#'
-#' temp <- tempdir()
-#' writeBin(as_binary, path(temp, "county.RData"))
-#' load(path(temp, "county.RData"))
+#' get_dataframe_by_doi(
+#' filedoi = "10.70122/FK2/PPIAXE/X2FC5V",
+#' server = "demo.dataverse.org",
+#' original = TRUE,
+#' .f = function(x) load(x, envir = .GlobalEnv))
#'
#' # If you are certain each RData contains only one object, one could define a
#' # custom function used in https://stackoverflow.com/a/34926943
@@ -151,7 +149,7 @@ get_dataframe_by_id <- function(
}
# READ raw data
- raw <- get_file(file = fileid, original = original, ...)
+ raw <- get_file(file = fileid, original = original, return_url = FALSE, ...)
# save to temp and then read it in with supplied function
if (!is.null(.f)) {
diff --git a/R/get_file.R b/R/get_file.R
index 37f9c68..cf2fd92 100644
--- a/R/get_file.R
+++ b/R/get_file.R
@@ -85,6 +85,7 @@ get_file <- function(
dataset = NULL,
format = c("original", "bundle"),
vars = NULL,
+ return_url = FALSE,
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
original = TRUE,
@@ -124,6 +125,7 @@ get_file <- function(
key = key,
server = server,
original = original,
+ return_url = return_url,
...
)
}
@@ -148,6 +150,7 @@ get_file_by_name <- function(
dataset,
format = c("original", "bundle"),
vars = NULL,
+ return_url = FALSE,
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
original = TRUE,
@@ -171,6 +174,7 @@ get_file_by_name <- function(
key = key,
server = server,
original = original,
+ return_url = return_url,
...
)
}
diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R
index d6710e1..3c24c85 100644
--- a/R/get_file_by_id.R
+++ b/R/get_file_by_id.R
@@ -8,6 +8,8 @@
#' @param progress Whether to show a progress bar of the download.
#' If not specified, will be set to `TRUE` for a file larger than 100MB. To fix
#' a value, set `FALSE` or `TRUE`.
+#' @param return_url Instead of downloading the file, return the URL for download.
+#' Defaults to `FALSE`.
#'
#' @export
get_file_by_id <- function(
@@ -17,6 +19,7 @@ get_file_by_id <- function(
vars = NULL,
original = TRUE,
progress = NULL,
+ return_url = FALSE,
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
@@ -92,7 +95,9 @@ get_file_by_id <- function(
# If not bundle, request single file in non-bundle format ----
u <- paste0(api_url(server), u_part, fileid)
-
+ if (return_url) {
+ return(httr::modify_url(u, query = query))
+ }
if (isFALSE(progress))
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), query = query, ...)
@@ -117,6 +122,7 @@ get_file_by_doi <- function(
format = c("original", "bundle"),
vars = NULL,
original = TRUE,
+ return_url = FALSE,
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
@@ -129,6 +135,7 @@ get_file_by_doi <- function(
key = key,
server = server,
original = original,
+ return_url = return_url,
...
)
}
diff --git a/R/get_url.R b/R/get_url.R
new file mode 100644
index 0000000..b45d8bb
--- /dev/null
+++ b/R/get_url.R
@@ -0,0 +1,133 @@
+#' @title Get Dataverse file download URL
+#'
+#' @description Get URL of associated file. `get_url_*` functions return a URL as
+#' a string. This can be then used in other functions such as `curl::curl_download()`.
+#'
+#' @details
+#' This function does not download the associated data.
+#' In contrast, `get_dataframe()` downloads the requested file to a tempfile, and then uses R
+#' to read it. And `get_file(.., return_url = FALSE)` reads the binary file into
+#' R's memory with `httr::GET()`. `get_url()` simply return the URL for download.
+#'
+#'
+#' @inheritParams get_file
+#' @name URLs
+#'
+#' @returns A string or a list of strings that are URLs.
+#' @export
+#' @examples \dontrun{
+#' # get URLs
+#' get_url_by_name(
+#' filename = "nlsw88.tab",
+#' dataset = "10.70122/FK2/PPIAXE",
+#' server = "demo.dataverse.org"
+#' )
+#' # https://demo.dataverse.org/api/access/datafile/1734017?format=original
+#'
+#' # For ingested, tab-delimited files
+#' get_url_by_name(
+#' filename = "nlsw88.tab",
+#' dataset = "10.70122/FK2/PPIAXE",
+#' original = FALSE,
+#' server = "demo.dataverse.org"
+#' )
+#' # https://demo.dataverse.org/api/access/datafile/1734017
+#'
+#' # To download to local directory
+#' curl::curl_download(
+#' "https://demo.dataverse.org/api/access/datafile/1734017?format=original",
+#' destfile = "nlsw88.dta")
+#' }
+get_url <- function(
+ file,
+ dataset = NULL,
+ format = c("original", "bundle"),
+ key = Sys.getenv("DATAVERSE_KEY"),
+ server = Sys.getenv("DATAVERSE_SERVER"),
+ original = TRUE,
+ ...) {
+
+ get_file(
+ fileid = file,
+ dataset = dataset,
+ format = format,
+ key = key,
+ server = server,
+ return_url = TRUE,
+ original = original,
+ ...
+ )
+}
+
+#' @rdname URLs
+#' @export
+get_url_by_name <- function(
+ filename,
+ dataset,
+ format = c("original", "bundle"),
+ key = Sys.getenv("DATAVERSE_KEY"),
+ server = Sys.getenv("DATAVERSE_SERVER"),
+ original = TRUE,
+ ...
+) {
+ format <- match.arg(format)
+
+ get_file_by_name(
+ filename,
+ dataset,
+ format = format,
+ key = key,
+ server = server,
+ original = original,
+ return_url = TRUE,
+ ...
+ )
+}
+
+#' @rdname URLs
+#' @export
+get_url_by_id <- function(
+ fileid,
+ dataset = NULL,
+ format = c("original", "bundle"),
+ key = Sys.getenv("DATAVERSE_KEY"),
+ server = Sys.getenv("DATAVERSE_SERVER"),
+ original = TRUE,
+ ...) {
+
+ format <- match.arg(format)
+ get_file_by_id(
+ fileid,
+ dataset = NULL,
+ format = c("original", "bundle"),
+ key = key,
+ server = server,
+ original = original,
+ return_url = TRUE,
+ ...
+ )
+}
+
+#' @rdname URLs
+#' @export
+get_url_by_doi <- function(
+ filedoi,
+ dataset = NULL,
+ format = c("original", "bundle"),
+ key = Sys.getenv("DATAVERSE_KEY"),
+ server = Sys.getenv("DATAVERSE_SERVER"),
+ original = TRUE,
+ ...) {
+
+ format <- match.arg(format)
+ get_file_by_doi(
+ filedoi,
+ dataset = NULL,
+ format = c("original", "bundle"),
+ key = key,
+ server = server,
+ original = original,
+ return_url = TRUE,
+ ...
+ )
+}
diff --git a/README.Rmd b/README.Rmd
index ceabc54..05e5f5b 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -12,10 +12,10 @@ Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
[![CRAN Version](https://www.r-pkg.org/badges/version/dataverse)](https://cran.r-project.org/package=dataverse)
![Downloads](https://cranlogs.r-pkg.org/badges/dataverse)
-[![R-CMD-check-thorough](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml/badge.svg?branch=master)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml)
-[![R-CMD-check-daily](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml/badge.svg?branch=master)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml)
+[![R-CMD-check-thorough](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml/badge.svg?branch=main)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml)
+[![R-CMD-check-daily](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml/badge.svg?branch=main)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml)
[![R-CMD-check-dev](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-dev.yaml/badge.svg)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-dev.yaml)
-[![codecov.io](https://codecov.io/github/IQSS/dataverse-client-r/coverage.svg?branch=master)](https://codecov.io/github/IQSS/dataverse-client-r?branch=master)
+[![codecov.io](https://codecov.io/github/IQSS/dataverse-client-r/coverage.svg?branch=main)](https://app.codecov.io/github/IQSS/dataverse-client-r?branch=main)
[![Dataverse Project logo](https://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png)](https://dataverse.org)
@@ -50,7 +50,7 @@ For features that require a Dataverse account for the specific server installati
DATAVERSE_KEY="examplekey12345"
```
-in your .Renviron file, where `examplekey12345` should be replace with your own key. The environment file can be opened by `usethis::edit_r_environ()`.
+in your .Renviron file, where `examplekey12345` should be replaced with your own key. The environment file can be opened by `usethis::edit_r_environ()`.
#### Server
@@ -208,7 +208,7 @@ Functions related to user management and permissions are currently not exported
Dataverse clients in other programming languages include [pyDataverse](https://pydataverse.readthedocs.io/en/latest/) for Python and the [Java client](https://github.com/IQSS/dataverse-client-java). For more information, see [the Dataverse API page](https://guides.dataverse.org/en/5.5/api/client-libraries.html#r).
-Users interested in downloading metadata from archives other than Dataverse may be interested in Kurt Hornik's [OAIHarvester](https://cran.r-project.org/package=OAIHarvester) and Scott Chamberlain's [oai](https://cran.r-project.org/package=oai), which offer metadata download from any web repository that is compliant with the [Open Archives Initiative](http://www.openarchives.org/) standards. Additionally, [rdryad](https://cran.r-project.org/package=rdryad) uses OAIHarvester to interface with [Dryad](https://datadryad.org/stash). The [rfigshare](https://cran.r-project.org/package=rfigshare) package works in a similar spirit to **dataverse** with .
+Users interested in downloading metadata from archives other than Dataverse may be interested in Kurt Hornik's [OAIHarvester](https://cran.r-project.org/package=OAIHarvester) and Scott Chamberlain's [oai](https://cran.r-project.org/package=oai), which offer metadata download from any web repository that is compliant with the [Open Archives Initiative](https://www.openarchives.org:443/) standards. Additionally, [rdryad](https://cran.r-project.org/package=rdryad) uses OAIHarvester to interface with [Dryad](https://datadryad.org/stash). The [rfigshare](https://cran.r-project.org/package=rfigshare) package works in a similar spirit to **dataverse** with .
### More Information
diff --git a/README.md b/README.md
index ec77188..4433f16 100644
--- a/README.md
+++ b/README.md
@@ -5,10 +5,10 @@ R Client for Dataverse Repositories
Version](https://www.r-pkg.org/badges/version/dataverse)](https://cran.r-project.org/package=dataverse)
![Downloads](https://cranlogs.r-pkg.org/badges/dataverse)
-[![R-CMD-check-thorough](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml/badge.svg?branch=master)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml)
-[![R-CMD-check-daily](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml/badge.svg?branch=master)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml)
+[![R-CMD-check-thorough](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml/badge.svg?branch=main)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-thorough.yaml)
+[![R-CMD-check-daily](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml/badge.svg?branch=main)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-daily.yaml)
[![R-CMD-check-dev](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-dev.yaml/badge.svg)](https://github.com/IQSS/dataverse-client-r/actions/workflows/R-CMD-check-dev.yaml)
-[![codecov.io](https://codecov.io/github/IQSS/dataverse-client-r/coverage.svg?branch=master)](https://codecov.io/github/IQSS/dataverse-client-r?branch=master)
+[![codecov.io](https://codecov.io/github/IQSS/dataverse-client-r/coverage.svg?branch=main)](https://app.codecov.io/github/IQSS/dataverse-client-r?branch=main)
[![Dataverse Project
logo](https://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png)](https://dataverse.org)
@@ -54,7 +54,7 @@ variable called `DATAVERSE_KEY`. It can be set as a default by adding
DATAVERSE_KEY="examplekey12345"
```
-in your .Renviron file, where `examplekey12345` should be replace with
+in your .Renviron file, where `examplekey12345` should be replaced with
your own key. The environment file can be opened by
`usethis::edit_r_environ()`.
@@ -285,9 +285,10 @@ Dataverse may be interested in Kurt Hornik’s
[OAIHarvester](https://cran.r-project.org/package=OAIHarvester) and
Scott Chamberlain’s [oai](https://cran.r-project.org/package=oai), which
offer metadata download from any web repository that is compliant with
-the [Open Archives Initiative](http://www.openarchives.org/) standards.
-Additionally, [rdryad](https://cran.r-project.org/package=rdryad) uses
-OAIHarvester to interface with [Dryad](https://datadryad.org/stash). The
+the [Open Archives Initiative](https://www.openarchives.org:443/)
+standards. Additionally,
+[rdryad](https://cran.r-project.org/package=rdryad) uses OAIHarvester to
+interface with [Dryad](https://datadryad.org/stash). The
[rfigshare](https://cran.r-project.org/package=rfigshare) package works
in a similar spirit to **dataverse** with .
diff --git a/cran-comments.md b/cran-comments.md
index f9cc69c..1083abd 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -1,7 +1,7 @@
Description
-----------------------------------------------
-This version fixes an outdated token error reported on March 2023. No other changes were made.
+This version (0.3.14) removes remote resources from vignette per CRAN policy, and makes several other usability improvements.
Shiro Kuriwaki
@@ -9,15 +9,14 @@ Shiro Kuriwaki
Test environments
-----------------------------------------------
-1. [win-builder](https://win-builder.r-project.org/QhQR4q21BLc1), development version (`devtools::check_win_devel()`)
-2. [R-hub](https://builder.r-hub.io/status/dataverse_0.3.13.tar.gz-a910246e058d4fdea677a3e29278dfbf).
-3. [GitHub Actions](https://github.com/IQSS/dataverse-client-r/actions)
+1. [win-builder](https://win-builder.r-project.org/FQhpeR7xF2O1), development version (`devtools::check_win_devel()`)
+2. [GitHub Actions](https://github.com/IQSS/dataverse-client-r/actions/runs/9054158346)
* os: macOS-latest , r: 'release'
* os: windows-latest, r: 'devel'
* os: windows-latest, r: 'release'
* os: ubuntu-20.04 , r: 'devel'
* os: ubuntu-20.04 , r: 'release'
-4. Local macOS, R 4.2.1
+3. Local macOS, R 4.3.3
R CMD check results
diff --git a/for-developers/developer-tasks.R b/for-developers/developer-tasks.R
index 431229a..477864d 100644
--- a/for-developers/developer-tasks.R
+++ b/for-developers/developer-tasks.R
@@ -34,6 +34,6 @@ devtools::check( # Equivalent of R-hub
remote = TRUE,
incoming = TRUE
)
-# devtools::check_rhub(email = "shirokuriwaki@gmail.com", env_vars = c(R_COMPILE_AND_INSTALL_PACKAGES = "always"))
-# devtools::check_win_devel() # CRAN submission policies encourage the development version
+devtools::check_rhub(email = "shirokuriwaki@gmail.com")
+devtools::check_win_devel() # CRAN submission policies encourage the development version
# devtools::release(check = FALSE) # Careful, the last question ultimately uploads it to CRAN, where you can't delete/reverse your decision.
diff --git a/inst/constants.yml b/inst/constants.yml
index 720b209..4266efe 100644
--- a/inst/constants.yml
+++ b/inst/constants.yml
@@ -1,4 +1,4 @@
server: "demo.dataverse.org"
-api_token: "c0d39fb3-ac48-4db7-9781-390073440495"
-api_token_expiration: "2024-03-22"
+api_token: "15372813-c54f-471f-a3e8-c269ee6a610f"
+api_token_expiration: "2025-05-10"
api_token_name: "shirokuriwaki"
diff --git a/man/URLs.Rd b/man/URLs.Rd
new file mode 100644
index 0000000..822b272
--- /dev/null
+++ b/man/URLs.Rd
@@ -0,0 +1,139 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_url.R
+\name{URLs}
+\alias{URLs}
+\alias{get_url}
+\alias{get_url_by_name}
+\alias{get_url_by_id}
+\alias{get_url_by_doi}
+\title{Get Dataverse file download URL}
+\usage{
+get_url(
+ file,
+ dataset = NULL,
+ format = c("original", "bundle"),
+ key = Sys.getenv("DATAVERSE_KEY"),
+ server = Sys.getenv("DATAVERSE_SERVER"),
+ original = TRUE,
+ ...
+)
+
+get_url_by_name(
+ filename,
+ dataset,
+ format = c("original", "bundle"),
+ key = Sys.getenv("DATAVERSE_KEY"),
+ server = Sys.getenv("DATAVERSE_SERVER"),
+ original = TRUE,
+ ...
+)
+
+get_url_by_id(
+ fileid,
+ dataset = NULL,
+ format = c("original", "bundle"),
+ key = Sys.getenv("DATAVERSE_KEY"),
+ server = Sys.getenv("DATAVERSE_SERVER"),
+ original = TRUE,
+ ...
+)
+
+get_url_by_doi(
+ filedoi,
+ dataset = NULL,
+ format = c("original", "bundle"),
+ key = Sys.getenv("DATAVERSE_KEY"),
+ server = Sys.getenv("DATAVERSE_SERVER"),
+ original = TRUE,
+ ...
+)
+}
+\arguments{
+\item{file}{An integer specifying a file identifier; or a vector of integers
+specifying file identifiers; or, if used with the prefix \code{"doi:"}, a
+character with the file-specific DOI; or, if used without the prefix, a
+filename accompanied by a dataset DOI in the \code{dataset} argument, or an object of
+class \dQuote{dataverse_file} as returned by \code{\link{dataset_files}}.
+Can be a vector for multiple files.}
+
+\item{dataset}{A character specifying a persistent identification ID for a dataset,
+for example \code{"doi:10.70122/FK2/HXJVJU"}. Alternatively, an object of class
+\dQuote{dataverse_dataset} obtained by \code{dataverse_contents()}.}
+
+\item{format}{A character string specifying a file format for download.
+by default, this is \dQuote{original} (the original file format). If \code{NULL},
+no query is added, so ingested files are returned in their ingested TSV form.
+For tabular datasets, the option \dQuote{bundle} downloads the bundle
+of the original and archival versions, as well as the documentation.
+See \url{https://guides.dataverse.org/en/latest/api/dataaccess.html} for details.}
+
+\item{key}{A character string specifying a Dataverse server API key. If one
+is not specified, functions calling authenticated API endpoints will fail.
+Keys can be specified atomically or globally using
+\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.}
+
+\item{server}{A character string specifying a Dataverse server.
+Multiple Dataverse installations exist, with \code{"dataverse.harvard.edu"} being the
+most major. The server can be defined each time within a function, or it can
+be set as a default via an environment variable. To set a default, run
+\code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")}
+or add \verb{DATAVERSE_SERVER = "dataverse.harvard.edu} in one's \code{.Renviron}
+file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.}
+
+\item{original}{A logical, defaulting to TRUE. If a ingested (.tab) version is
+available, download the original version instead of the ingested? If there was
+no ingested version, is set to NA. Note in \verb{get_dataframe_*},
+\code{original} is set to FALSE by default. Either can be changed.}
+
+\item{...}{Additional arguments passed to an HTTP request function, such as
+\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or
+\code{\link[httr]{DELETE}}.}
+
+\item{filename}{Filename of the dataset, with file extension as shown in Dataverse
+(for example, if nlsw88.dta was the original but is displayed as the ingested
+nlsw88.tab, use the ingested version.)}
+
+\item{fileid}{A numeric ID internally used for \code{get_file_by_id}. Can be a vector for multiple files.}
+
+\item{filedoi}{A DOI for a single file (not the entire dataset), of the form
+\code{"10.70122/FK2/PPIAXE/MHDB0O"} or \code{"doi:10.70122/FK2/PPIAXE/MHDB0O"}.
+Can be a vector for multiple files.}
+}
+\value{
+A string or a list of strings that are URLs.
+}
+\description{
+Get URL of associated file. \verb{get_url_*} functions return a URL as
+a string. This can be then used in other functions such as \code{curl::curl_download()}.
+}
+\details{
+This function does not download the associated data.
+In contrast, \code{get_dataframe()} downloads the requested file to a tempfile, and then uses R
+to read it. And \code{get_file(.., return_url = FALSE)} reads the binary file into
+R's memory with \code{httr::GET()}. \code{get_url()} simply return the URL for download.
+}
+\examples{
+\dontrun{
+# get URLs
+get_url_by_name(
+ filename = "nlsw88.tab",
+ dataset = "10.70122/FK2/PPIAXE",
+ server = "demo.dataverse.org"
+)
+# https://demo.dataverse.org/api/access/datafile/1734017?format=original
+
+# For ingested, tab-delimited files
+get_url_by_name(
+ filename = "nlsw88.tab",
+ dataset = "10.70122/FK2/PPIAXE",
+ original = FALSE,
+ server = "demo.dataverse.org"
+)
+# https://demo.dataverse.org/api/access/datafile/1734017
+
+# To download to local directory
+curl::curl_download(
+ "https://demo.dataverse.org/api/access/datafile/1734017?format=original",
+ destfile = "nlsw88.dta")
+}
+}
diff --git a/man/files.Rd b/man/files.Rd
index 67e017f..691ac3d 100644
--- a/man/files.Rd
+++ b/man/files.Rd
@@ -12,6 +12,7 @@ get_file(
dataset = NULL,
format = c("original", "bundle"),
vars = NULL,
+ return_url = FALSE,
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
original = TRUE,
@@ -23,6 +24,7 @@ get_file_by_name(
dataset,
format = c("original", "bundle"),
vars = NULL,
+ return_url = FALSE,
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
original = TRUE,
@@ -36,6 +38,7 @@ get_file_by_id(
vars = NULL,
original = TRUE,
progress = NULL,
+ return_url = FALSE,
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
@@ -47,6 +50,7 @@ get_file_by_doi(
format = c("original", "bundle"),
vars = NULL,
original = TRUE,
+ return_url = FALSE,
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
@@ -74,6 +78,9 @@ See \url{https://guides.dataverse.org/en/latest/api/dataaccess.html} for details
\item{vars}{A character vector specifying one or more variable names, used to
extract a subset of the data.}
+\item{return_url}{Instead of downloading the file, return the URL for download.
+Defaults to \code{FALSE}.}
+
\item{key}{A character string specifying a Dataverse server API key. If one
is not specified, functions calling authenticated API endpoints will fail.
Keys can be specified atomically or globally using
diff --git a/man/get_dataframe.Rd b/man/get_dataframe.Rd
index 3001f3d..a4a2f90 100644
--- a/man/get_dataframe.Rd
+++ b/man/get_dataframe.Rd
@@ -64,6 +64,8 @@ be set as a default via an environment variable. To set a default, run
\code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")}
or add \verb{DATAVERSE_SERVER = "dataverse.harvard.edu} in one's \code{.Renviron}
file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.}
+ \item{\code{return_url}}{Instead of downloading the file, return the URL for download.
+Defaults to \code{FALSE}.}
}}
\item{fileid}{A numeric ID internally used for \code{get_file_by_id}. Can be a vector for multiple files.}
@@ -140,15 +142,13 @@ if (requireNamespace("haven", quietly = TRUE)) {
# 3. RData files are read in by `base::load()` but cannot be assigned to an
# object name. The following shows two possible ways to read in such files.
+# First, the RData object can be loaded to the environment without object assignment.
-# First, without relying on `get_dataframe_*`, write as a binary file:
-as_binary <- get_file_by_doi(
- filedoi = "doi:10.70122/FK2/PPIAXE/5VPXKE",
- server = "demo.dataverse.org")
-
-temp <- tempdir()
-writeBin(as_binary, path(temp, "county.RData"))
-load(path(temp, "county.RData"))
+get_dataframe_by_doi(
+ filedoi = "10.70122/FK2/PPIAXE/X2FC5V",
+ server = "demo.dataverse.org",
+ original = TRUE,
+ .f = function(x) load(x, envir = .GlobalEnv))
# If you are certain each RData contains only one object, one could define a
# custom function used in https://stackoverflow.com/a/34926943
diff --git a/tests/B-search_ghaction.R b/tests/B-search_ghaction.R
new file mode 100644
index 0000000..170dd98
--- /dev/null
+++ b/tests/B-search_ghaction.R
@@ -0,0 +1,22 @@
+## ----knitr_options, echo=FALSE, results="hide"----------------------------------------------
+options(width = 120)
+knitr::opts_chunk$set(results = "hold")
+
+
+## -------------------------------------------------------------------------------------------
+library("dataverse")
+Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
+dataverse_search("Gary King")[c("name")]
+
+
+## -------------------------------------------------------------------------------------------
+dataverse_search("Gary King", start = 6, per_page = 20)[c("name")]
+
+
+## -------------------------------------------------------------------------------------------
+ei <- dataverse_search(author = "Gary King", title = "Ecological Inference", type = "dataset", per_page = 20)
+# fields returned
+names(ei)
+# names of datasets
+ei$name
+
diff --git a/tests/C-download_ghaction.R b/tests/C-download_ghaction.R
new file mode 100644
index 0000000..592f040
--- /dev/null
+++ b/tests/C-download_ghaction.R
@@ -0,0 +1,76 @@
+## ----knitr_options, echo=FALSE, results="hide"----------------------------------------------
+options(width = 120)
+knitr::opts_chunk$set(results = "hold")
+
+
+## -------------------------------------------------------------------------------------------
+Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
+
+
+## -------------------------------------------------------------------------------------------
+library("dataverse")
+library("tibble") # to see dataframes in tidyverse-form
+
+
+## ----echo=FALSE, message=FALSE,include=FALSE------------------------------------------------
+energy <- get_dataframe_by_name(
+ filename = "comprehensiveJapanEnergy.tab",
+ dataset = "10.7910/DVN/ARKOTI",
+ server = "dataverse.harvard.edu")
+
+
+## ----eval=FALSE-----------------------------------------------------------------------------
+## energy <- get_dataframe_by_name(
+## filename = "comprehensiveJapanEnergy.tab",
+## dataset = "10.7910/DVN/ARKOTI",
+## server = "dataverse.harvard.edu")
+
+
+## -------------------------------------------------------------------------------------------
+head(energy)
+
+
+## -------------------------------------------------------------------------------------------
+library(readr)
+energy <- get_dataframe_by_name(
+ filename = "comprehensiveJapanEnergy.tab",
+ dataset = "10.7910/DVN/ARKOTI",
+ server = "dataverse.harvard.edu",
+ .f = function(x) read.delim(x, sep = "\t"))
+
+head(energy)
+
+
+## ----message=FALSE--------------------------------------------------------------------------
+argentina_tab <- get_dataframe_by_name(
+ filename = "alpl2013.tab",
+ dataset = "10.7910/DVN/ARKOTI",
+ server = "dataverse.harvard.edu")
+
+
+## -------------------------------------------------------------------------------------------
+str(argentina_tab$polling_place)
+
+
+## -------------------------------------------------------------------------------------------
+argentina_dta <- get_dataframe_by_name(
+ filename = "alpl2013.tab",
+ dataset = "10.7910/DVN/ARKOTI",
+ server = "dataverse.harvard.edu",
+ original = TRUE,
+ .f = haven::read_dta)
+
+
+## -------------------------------------------------------------------------------------------
+str(argentina_dta$polling_place)
+
+
+## -------------------------------------------------------------------------------------------
+str(dataset_metadata("10.7910/DVN/ARKOTI", server = "dataverse.harvard.edu"),
+ max.level = 2)
+
+
+## ----eval = FALSE---------------------------------------------------------------------------
+## code3 <- get_file("chapter03.R", "doi:10.7910/DVN/ARKOTI", server = "dataverse.harvard.edu")
+## writeBin(code3, "chapter03.R")
+
diff --git a/tests/testthat/tests-dataset_files.R b/tests/testthat/tests-dataset_files.R
index 6da8dd3..aca8104 100644
--- a/tests/testthat/tests-dataset_files.R
+++ b/tests/testthat/tests-dataset_files.R
@@ -1,5 +1,5 @@
# See https://demo.dataverse.org/dataverse/dataverse-client-r
-# https://doi.org/10.70122/FK2/HXJVJU
+# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU
test_that("download tab from DOI and filename", {
# testthat::skip_if_offline("demo.dataverse.org")
diff --git a/tests/testthat/tests-dataset_metadata.R b/tests/testthat/tests-dataset_metadata.R
index 9e90e8f..2fcdfa7 100644
--- a/tests/testthat/tests-dataset_metadata.R
+++ b/tests/testthat/tests-dataset_metadata.R
@@ -1,5 +1,5 @@
# See https://demo.dataverse.org/dataverse/dataverse-client-r
-# https://doi.org/10.70122/FK2/HXJVJU
+# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU
test_that("check metadata format", {
# testthat::skip_if_offline("demo.dataverse.org")
@@ -22,6 +22,6 @@ test_that("check versions format", {
ds_index <- which(sapply(contents, function(x) x$identifier) == "FK2/HXJVJU")
actual <- dataset_versions(contents[[ds_index]])
- expect_length(actual[[1]], 15L)
+ expect_length(actual[[1]], 17L)
expect_s3_class(actual[[2]], "dataverse_dataset_version")
})
diff --git a/tests/testthat/tests-get_dataframe-dataframe-basketball.R b/tests/testthat/tests-get_dataframe-dataframe-basketball.R
index 52e6c86..e963b29 100644
--- a/tests/testthat/tests-get_dataframe-dataframe-basketball.R
+++ b/tests/testthat/tests-get_dataframe-dataframe-basketball.R
@@ -1,5 +1,5 @@
# See https://demo.dataverse.org/dataverse/dataverse-client-r
-# https://doi.org/10.70122/FK2/HXJVJU
+# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU
test_that("roster-by-name", {
# testthat::skip_if_offline("demo.dataverse.org")
@@ -48,18 +48,11 @@ test_that("load-rdata", {
# testthat::skip_if_offline("demo.dataverse.org")
testthat::skip_on_cran()
- # https://stackoverflow.com/a/34926943
- f_load_rda <- function(file) {
- tmp <- new.env()
- load(file = file, envir = tmp)
- tmp[[ls(tmp)[1]]]
- }
-
- from_rda <- get_dataframe_by_id(
+ get_dataframe_by_id(
file = 1939003,
server = "demo.dataverse.org",
- .f = f_load_rda,
- original = TRUE)
+ original = TRUE,
+ .f = function(x) load(x, envir = .GlobalEnv))
- expect_s3_class(from_rda, "tbl")
+ expect_s3_class(nlsw88, "tbl")
})
diff --git a/tests/testthat/tests-get_dataframe-original-basketball.R b/tests/testthat/tests-get_dataframe-original-basketball.R
index df91f6a..c2d3a61 100644
--- a/tests/testthat/tests-get_dataframe-original-basketball.R
+++ b/tests/testthat/tests-get_dataframe-original-basketball.R
@@ -1,5 +1,5 @@
# See https://demo.dataverse.org/dataverse/dataverse-client-r
-# https://doi.org/10.70122/FK2/HXJVJU
+# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU
# standarize_string <- function (x) {
# substring(x, 1, 10)
diff --git a/tests/testthat/tests-get_dataset.R b/tests/testthat/tests-get_dataset.R
index cc9b9cc..212b985 100644
--- a/tests/testthat/tests-get_dataset.R
+++ b/tests/testthat/tests-get_dataset.R
@@ -1,5 +1,5 @@
# See https://demo.dataverse.org/dataverse/dataverse-client-r
-# https://doi.org/10.70122/FK2/HXJVJU
+# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU
test_that("download tab from DOI and filename", {
# testthat::skip_if_offline("demo.dataverse.org")
@@ -11,7 +11,7 @@ test_that("download tab from DOI and filename", {
files <- actual$files
expected_dv <- retrieve_info_dataverse("expected-dataverse.yml")
- expect_length(actual , 15L)
+ expect_length(actual , 17L)
expect_equal(actual$id , 182158L)
expect_equal(actual$datasetId , 1734004L)
expect_equal(actual$datasetPersistentId , "doi:10.70122/FK2/HXJVJU")
@@ -21,17 +21,17 @@ test_that("download tab from DOI and filename", {
expect_equal(actual$license$name , "CC0 1.0")
expect_equal(nrow(files) , 2L)
- expect_equal(ncol(files) , 22L)
+ expect_equal(ncol(files) , 26L)
- expect_equal(files$label , c("roster-bulls-1996.tab", "vector-basketball.svg"))
- expect_equal(files$restricted , c(FALSE, FALSE))
- expect_equal(files$version , c(3L, 2L))
- expect_equal(files$datasetVersionId , c(actual$id, actual$id))
- expect_equal(files$directoryLabel , c(NA, "resources"))
- expect_equal(files$id , c(1734005L, 1734006L))
- expect_equal(files$persistentId , c("doi:10.70122/FK2/HXJVJU/SA3Z2V", "doi:10.70122/FK2/HXJVJU/FHV8ZB"))
- expect_equal(files$pidURL , c("https://doi.org/10.70122/FK2/HXJVJU/SA3Z2V", "https://doi.org/10.70122/FK2/HXJVJU/FHV8ZB"))
- expect_equal(files$filename , c("roster-bulls-1996.tab", "vector-basketball.svg"))
+ expect_setequal(files$label , c("roster-bulls-1996.tab", "vector-basketball.svg"))
+ expect_setequal(files$restricted , c(FALSE, FALSE))
+ expect_setequal(files$version , c(3L, 2L))
+ expect_setequal(files$datasetVersionId , c(actual$id, actual$id))
+ expect_setequal(files$directoryLabel , c(NA, "resources"))
+ expect_setequal(files$id , c(1734005L, 1734006L))
+ expect_setequal(files$persistentId , c("doi:10.70122/FK2/HXJVJU/SA3Z2V", "doi:10.70122/FK2/HXJVJU/FHV8ZB"))
+ expect_setequal(files$pidURL , c("https://doi.org/10.70122/FK2/HXJVJU/SA3Z2V", "https://doi.org/10.70122/FK2/HXJVJU/FHV8ZB"))
+ expect_setequal(files$filename , c("roster-bulls-1996.tab", "vector-basketball.svg"))
# expect_equal(files$description , c(NA, "CC-0-from-https://publicdomainvectors.org/en/free-clipart/Basketball-vector-symbol/69448.html"))
})
diff --git a/tests/testthat/tests-get_file.R b/tests/testthat/tests-get_file.R
index 8bad37e..fe60747 100644
--- a/tests/testthat/tests-get_file.R
+++ b/tests/testthat/tests-get_file.R
@@ -1,5 +1,5 @@
# See https://demo.dataverse.org/dataverse/dataverse-client-r
-# https://doi.org/10.70122/FK2/HXJVJU
+# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU
test_that("download tab from DOI and filename", {
# testthat::skip_if_offline("demo.dataverse.org")
diff --git a/tests/testthat/tests-get_file_metadata.R b/tests/testthat/tests-get_file_metadata.R
index 582990b..d129436 100644
--- a/tests/testthat/tests-get_file_metadata.R
+++ b/tests/testthat/tests-get_file_metadata.R
@@ -1,5 +1,5 @@
# See https://demo.dataverse.org/dataverse/dataverse-client-r
-# https://doi.org/10.70122/FK2/HXJVJU
+# https://demo.dataverse.org/dataset.xhtml?persistentId=doi:10.70122/FK2/HXJVJU
test_that("get file metadata from DOI and filename", {
# testthat::skip_if_offline("demo.dataverse.org")
diff --git a/tests/testthat/tests-get_url.R b/tests/testthat/tests-get_url.R
new file mode 100644
index 0000000..33a81b2
--- /dev/null
+++ b/tests/testthat/tests-get_url.R
@@ -0,0 +1,19 @@
+# Informative error message (PR #30)
+test_that("Return just URL", {
+ testthat::skip_on_cran()
+ expect_equal(
+ get_file(c(1734005, 1734006),
+ server = "demo.dataverse.org",
+ original = TRUE,
+ return_url = TRUE),
+ list("https://demo.dataverse.org/api/access/datafile/1734005?format=original",
+ "https://demo.dataverse.org/api/access/datafile/1734006"))
+
+ expect_equal(
+ get_url_by_name(
+ filename = "nlsw88.tab",
+ dataset = "10.70122/FK2/PPIAXE",
+ server = "demo.dataverse.org"
+ ),
+ expected = "https://demo.dataverse.org/api/access/datafile/1734017?format=original")
+})
diff --git a/vignettes/B-search.Rmd b/vignettes/B-search.Rmd
index cf363c6..458d927 100644
--- a/vignettes/B-search.Rmd
+++ b/vignettes/B-search.Rmd
@@ -17,21 +17,52 @@ knitr::opts_chunk$set(results = "hold")
Searching for data within Dataverse is quite easy using the `dataverse_search()` function. The simplest searches simply consist of a query string:
-```{r}
+```{r, eval=FALSE}
library("dataverse")
Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
dataverse_search("Gary King")[c("name")]
```
+```{r}
+## name
+## 1 004_informal_food_retail_Nigeria_2018.tab
+## 2 00592Belle-Stress-PaperData-Subject_King_ChildIs.PDF
+## 3 00592Belle-Stress-PaperData-Subject_King_ChildO.PDF
+## 4 00592Belle-Stress-PaperData-Subject_King_Coping.PDF
+## 5 00592Belle-Stress-PaperData-Subject_King_Discrimination.PDF
+## 6 00592Belle-Stress-PaperData-Subject_King_LifeCs.PDF
+## 7 00592Belle-Stress-PaperData-Subject_King_LifeE.PDF
+## 8 00592Belle-Stress-PaperData-Subject_KingAndMeunier_Parenting.PDF
+## 9 00698McArthur-King-BoxCoverSheets.pdf
+## 10 00698McArthur-King-MemoOfAgreement.pdf
+```
+
+
The results are paginated, so users can rely upon the `per_page` and `start` argument to requested subsequent pages of results. We'll start at 6 and to show that we retrieve the last five results from the previous query plus 15 more (due to `per_page = 20`):
-```{r}
+```{r, eval=FALSE}
dataverse_search("Gary King", start = 6, per_page = 20)[c("name")]
```
+```{r}
+# 10 of 3676 results retrieved
+## name
+## 1 004_informal_food_retail_Nigeria_2018.tab
+## 2 00698McArthur-King-BoxCoverSheets.pdf
+## 3 00698McArthur-King-MemoOfAgreement.pdf
+## 4 00698McArthur-King-StudyDescription.pdf
+## 5 01 ReadMe Unlocking history through automated virtual unfolding of sealed documents imaged by X-ray microtomography
+## 6 01_ReadMe_The_Spiral_Locked_Letters_of_Elizabeth_I_and_Mary_Queen_of_Scots
+## 7 03 Brienne Collection letterlocking data: Images folder 02/16, DB-0874_2–DB-0903
+## 8 03 Brienne Collection letterlocking data: Images folder 04/16, DB-0988–DB-1109_03
+## 9 03 Brienne Collection letterlocking data: Images folder 06/16, DB-1241_02–DB-1339_06
+## 10 03 Brienne Collection letterlocking data: Images folder 08/16, DB-1455_02–DB-1564_01
+```
+
+
More complicated searches can specify metadata fields like `title` and restrict results to a specific `type` of Dataverse object (a "dataverse", "dataset", or "file"):
-```{r}
+```{r, eval=FALSE}
ei <- dataverse_search(author = "Gary King", title = "Ecological Inference", type = "dataset", per_page = 20)
# fields returned
names(ei)
@@ -39,4 +70,39 @@ names(ei)
ei$name
```
+```{r}
+## [1] "name" "type" "url" "global_id"
+## [5] "description" "published_at" "publisher" "citationHtml"
+## [9] "identifier_of_dataverse" "name_of_dataverse" "citation" "storageIdentifier"
+## [13] "keywords" "subjects" "fileCount" "versionId"
+## [17] "versionState" "majorVersion" "minorVersion" "createdAt"
+## [21] "updatedAt" "contacts" "authors" "publications"
+## [1] "01 ReadMe Unlocking history through automated virtual unfolding of sealed documents imaged by X-ray microtomography"
+## [2] "01_ReadMe_The_Spiral_Locked_Letters_of_Elizabeth_I_and_Mary_Queen_of_Scots"
+## [3] "03 Brienne Collection letterlocking data: Images folder 02/16, DB-0874_2–DB-0903"
+## [4] "03 Brienne Collection letterlocking data: Images folder 04/16, DB-0988–DB-1109_03"
+## [5] "03 Brienne Collection letterlocking data: Images folder 06/16, DB-1241_02–DB-1339_06"
+## [6] "03 Brienne Collection letterlocking data: Images folder 08/16, DB-1455_02–DB-1564_01"
+## [7] "03 Brienne Collection letterlocking data: Images folder 12/16, DB-1868–DB-1963_03"
+## [8] "03 Brienne Collection letterlocking data: Images folder 14/16, DB-2064_01–2155_03"
+## [9] "03 Spiral-lock figures"
+## [10] "07 Letterlocking Categories and Formats Chart"
+## [11] "10 Foldable: Launch Little Book of Locks (UH6089), with Categories and Formats Chart. Letterlocking Instructional Resources"
+## [12] "10 Million International Dyadic Events"
+## [13] "1479 data points of covid19 policy response times"
+## [14] "2016 Census of Population: ADA and DA Maps for Kings County Nova Scotia"
+## [15] "3D Dust map from Green et al. (2015)"
+## [16] "3D dust map from Green et al. (2017)"
+## [17] "3D dust map from Green et al. (2019)"
+## [18] "A 1D Lyman-alpha Profile Camera for Plasma Edge Neutral Studies on the DIII-D Tokamak"
+## [19] "A Comparative Analysis of Brazil's Foreign Policy Drivers Towards the USA: Comment on Amorim Neto (2011)"
+## [20] "A Critique of Dyadic Design"
+## 16 1998 Jewish Community Study of the Coachella Valley, California
+## 17 2002 State Legislative Survey
+## 18 2007 White Sands Dune Field lidar topographic data
+## 19 2008 White Sands Dune Field lidar topographic data
+## 20 2012 STATA Data.tab
+
+```
+
Once datasets and files are identified, it is easy to download and use them directly in R. See the ["Data Download" vignette](C-download.html) for details.
diff --git a/vignettes/C-download.Rmd b/vignettes/C-download.Rmd
index 754f4d1..a5ba31d 100644
--- a/vignettes/C-download.Rmd
+++ b/vignettes/C-download.Rmd
@@ -42,7 +42,8 @@ library("tibble") # to see dataframes in tidyverse-form
First, we retrieve a plain-text file like this dataset on electricity consumption by [Wakiyama et al. (2014)](https://doi.org/10.7910/DVN/ARKOTI/GN1MRT). Taking the file name and dataset DOI from this entry,
-```{r, echo=FALSE, message=FALSE,include=FALSE}
+
+```{r, eval=FALSE}
energy <- get_dataframe_by_name(
filename = "comprehensiveJapanEnergy.tab",
dataset = "10.7910/DVN/ARKOTI",
@@ -50,20 +51,26 @@ energy <- get_dataframe_by_name(
```
```{r, eval=FALSE}
-energy <- get_dataframe_by_name(
- filename = "comprehensiveJapanEnergy.tab",
- dataset = "10.7910/DVN/ARKOTI",
- server = "dataverse.harvard.edu")
+head(energy)
```
```{r}
-head(energy)
+## # A tibble: 6 × 10
+## time date dummy temp temp2 all large house kepco tepco
+##
+## 1 1 8-Jan 0 5.9 34.8 95792389 35194957 26190714 13357735 26960899
+## 2 2 8-Feb 0 5.5 30.3 95156901 35322031 24224097 13315027 27189705
+## 3 3 8-Mar 0 10.7 114. 91034047 36474192 21391965 12805831 24495519
+## 4 4 8-Apr 0 14.7 216. 84087552 34949622 18494473 11494328 23540356
+## 5 5 8-May 0 18.5 342. 82742929 35417089 17923760 11589061 22848737
+## 6 6 8-Jun 0 21.3 454. 82180013 36692291 15205229 11360771 22487441
```
+
These `get_dataframe_*` functions, introduced in v0.3.0, directly read in the data into a R environment through whatever R function supplied by `.f`. The default of the `get_dataframe_*` functions is to read in such data by `readr::read_tsv()`. The `.f` function can be modified to modify the read-in settings. For example, the following modification is a base-R equivalent to read in the ingested data.
-```{r}
+```{r, eval=FALSE}
library(readr)
energy <- get_dataframe_by_name(
filename = "comprehensiveJapanEnergy.tab",
@@ -74,6 +81,16 @@ energy <- get_dataframe_by_name(
head(energy)
```
+```{r}
+## time date dummy temp temp2 all large house kepco tepco
+## 1 1 8-Jan 0 5.9 34.8 95792389 35194957 26190714 13357735 26960899
+## 2 2 8-Feb 0 5.5 30.3 95156901 35322031 24224097 13315027 27189705
+## 3 3 8-Mar 0 10.7 114.5 91034047 36474192 21391965 12805831 24495519
+## 4 4 8-Apr 0 14.7 216.1 84087552 34949622 18494473 11494328 23540356
+## 5 5 8-May 0 18.5 342.3 82742929 35417089 17923760 11589061 22848737
+## 6 6 8-Jun 0 21.3 453.7 82180013 36692291 15205229 11360771 22487441
+```
+
The dataverse package can also download datasets that are _drafts_ (i.e. versions not released publicly), as long as the user of the dataset provides their appropriate DATAVERSE_KEY. Users may need to modify the metadata of a datafile, such as adding a descriptive label, for the data downloading to work properly in this case. This is because the the file identifier UNF, which the read function relies on, may only appear after metadata has been added.
@@ -83,7 +100,7 @@ The dataverse package can also download datasets that are _drafts_ (i.e. version
If a file is displayed on dataverse as a `.tab` file like the survey data by [Alvarez et al. (2013)](https://doi.org/10.7910/DVN/ARKOTI/A8YRMP), it is likely that Dataverse [ingested](https://guides.dataverse.org/en/latest/user/tabulardataingest/index.html) the file to a plain-text, tab-delimited format.
-```{r, message=FALSE}
+```{r, message=FALSE,eval=FALSE}
argentina_tab <- get_dataframe_by_name(
filename = "alpl2013.tab",
dataset = "10.7910/DVN/ARKOTI",
@@ -93,13 +110,17 @@ argentina_tab <- get_dataframe_by_name(
However, ingested files may not retain important dataset attributes. For example, Stata and SPSS datasets encode value labels on to numeric values. Factor variables in R dataframes encode levels, not only labels. A plain-text ingested file will discard such information. For example, the `polling_place` variable in this data is only given by numbers, although the original data labelled these numbers with informative values.
-```{r}
+```{r,eval=FALSE}
str(argentina_tab$polling_place)
```
+```{r}
+## num [1:1475] 31 31 31 31 31 31 31 31 31 31 ...
+```
+
When ingesting, Dataverse retains a `original` version that retains these attributes but may not be readable in some platforms. The `get_dataframe_*` functions have an argument that can be set to `original = TRUE`. In this case we know that `alpl2013.tab` was originally a Stata dta file, so we can run:
-```{r}
+```{r, eval=FALSE}
argentina_dta <- get_dataframe_by_name(
filename = "alpl2013.tab",
dataset = "10.7910/DVN/ARKOTI",
@@ -110,10 +131,17 @@ argentina_dta <- get_dataframe_by_name(
Now we see that labels are read in through `haven`'s labelled variables class:
-```{r}
+```{r, eval=FALSE}
str(argentina_dta$polling_place)
```
+```{r}
+## dbl+lbl [1:1475] 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 3...
+## @ label : chr "polling_place"
+## @ format.stata: chr "%9.0g"
+## @ labels : Named num [1:37] 1 2 3 4 5 6 7 8 9 10 ...
+## ..- attr(*, "names")= chr [1:37] "E.E.T." "Escuela Juan Bautista Alberdi" "Escuela Juan Carlos Dávalos" "Escuela Bernardino de Rivadavia" ...
+```
Users should pick `.f` and `original` based on their existing knowledge of the file. If the original file is a `.sav` SPSS file, `.f` can be `haven::read_sav`. If it is a `.Rds` file, use `readRDS` or `readr::read_rds`. In fact, because the raw data is read in as a binary, there is no limitation to the file types `get_dataframe_*` can read in, as far as the dataverse package is concerned.
@@ -138,11 +166,23 @@ This shows that there are indeed 32 files, a mix of .R code files and tab- and c
You can also retrieve more extensive metadata using `dataset_metadata()`:
-```{r}
+```{r, eval=FALSE}
str(dataset_metadata("10.7910/DVN/ARKOTI", server = "dataverse.harvard.edu"),
max.level = 2)
```
+```{r}
+## List of 3
+## $ displayName: chr "Citation Metadata"
+## $ name : chr "citation"
+## $ fields :'data.frame': 7 obs. of 4 variables:
+## ..$ typeName : chr [1:7] "title" "author" "datasetContact" "dsDescription" ...
+## ..$ multiple : logi [1:7] FALSE TRUE TRUE TRUE TRUE FALSE ...
+## ..$ typeClass: chr [1:7] "primitive" "compound" "compound" "compound" ...
+## ..$ value :List of 7
+```
+
+
## Retrieving Scripts and Other Files
If the file you want to retrieve is not data, you may want to use the more primitive function, `get_file`, which gets the file data as a raw binary file. See the help page examples of `get_file()` that use the `base::writeBin()` function for details on how to write and read these binary files instead.