diff --git a/.Rbuildignore b/.Rbuildignore index e9febb7..df4cd6c 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,5 +8,6 @@ RSocrata.Rcheck ^.*\.Rproj$ ^\.Rproj\.user$ .DS_Store -README.md ^\.travis\.yml$ +appveyor.yml +CONTRIBUTING.md \ No newline at end of file diff --git a/.gitignore b/.gitignore index 408ac80..dccab3c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,16 +1,14 @@ -/.settings +/.git /.project +/.Rbuildignore.Rcheck /.README.md.html -/man -/.git -/RSocrata_*.tar.gz -/RCheck +/.settings +/DESCRIPTION.Rcheck /out +/RCheck +/RSocrata_*.tar.gz /RSocrata.Rcheck *.pdf -/DESCRIPTION.Rcheck -/.Rbuildignore.Rcheck -*.Rproj.user *.Rproj *.Rproj.user *.Rhistory diff --git a/.travis.yml b/.travis.yml index 2b480a7..d9a0eb2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,27 +2,23 @@ # # See README.md for instructions, or for more configuration options, # see the wiki: -# https://github.com/craigcitro/r-travis/wiki +# http://docs.travis-ci.com/user/languages/r/ -language: c +language: R +sudo: required +warnings_are_errors: true -env: - - R_MY_PKG="$(basename $TRAVIS_REPO_SLUG)" +r_github_packages: + - hadley/httr + - jeroenooms/jsonlite + - jeroenooms/curl + - klutometis/roxygen + - jimhester/covr -before_install: - - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh - - chmod 755 ./travis-tool.sh - - ./travis-tool.sh bootstrap -install: - - ./travis-tool.sh install_deps -script: - - ./travis-tool.sh run_tests - - Rscript -e 'source("R/tests/testRSocrata.R"); runAllTestsCI()' - -after_failure: - - ./travis-tool.sh dump_logs +after_success: + - Rscript -e 'library(covr);coveralls()' notifications: email: on_success: change - on_failure: change \ No newline at end of file + on_failure: change diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..d627bf5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,25 @@ + +# How to contribute + +We really appreciate when users [fix bugs](https://github.com/Chicago/RSocrata/pull/25) or [provide new features](https://github.com/Chicago/RSocrata/pull/21). When submitting changes, please read below to help the development team keep on top of issues and changes. + +## Submitting a bug + +If you notice something strange, please [submit an issue on GitHub](https://github.com/Chicago/RSocrata/issues). In the issue, please try to achieve the following: + +* Describe what you did +* Describe what happened when you did it +* Describe what you think should happen +* If possible, describe where you think the error is occuring + +If you have multiple issues, please submit multiple requests. Once you submit your report, we'll often engage in a conversation or give it a label to be fixed. + +## Making Changes + +When you want to make a change, either to fix a bug or introduce a new feature, please follow the instructions below + +* Create a branch or fork of the project based off of the `dev` branch. +* Make commits of logical units +* Add unit tests for any new features +* Run all tests in `tests/testthat/` +* Create a pull request with a robust description or [reference the issue number](https://github.com/Chicago/RSocrata/issues) \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 62ad2b5..7883771 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,25 +1,27 @@ Package: RSocrata +Type: Package Title: Download 'Socrata' Data Sets as R Data Frames Description: Provides easier interaction with - Socrata open data portals \url{http://dev.socrata.com}. + Socrata open data portals http://dev.socrata.com. Users can provide a 'Socrata' data set resource URL, or a 'Socrata' Open Data API (SoDA) web query, or a 'Socrata' "human-friendly" URL, returns an R data frame. Converts dates to 'POSIX' format. Manages throttling by 'Socrata'. -Version: 1.6.0-12 -Date: 2015-4-20 -URL: https://github.com/Chicago/RSocrata -BugReports: https://github.com/Chicago/RSocrata/issues +Version: 1.6.2-9 +Date: 2015-7-10 +Author: Hugh Devlin, Ph. D., Tom Schenk, Jr., and John Malc +Maintainer: "Tom Schenk Jr." Depends: - httr (>= 0.3), - jsonlite (>= 0.9.14), - mime (>= 0.2) + R (>= 3.0.0) Imports: - curl (>= 0.5) + httr (>= 1.0.0), + jsonlite (>= 0.9.16), + mime (>= 0.3) Suggests: - RUnit -Author: Hugh Devlin, Ph. D. and Tom Schenk, Jr. -Maintainer: "Tom Schenk Jr." -License: MIT + file LICENSE \ No newline at end of file + testthat (>= 0.10.0), + roxygen2 (>= 4.1.0) +License: MIT + file LICENSE +URL: https://github.com/Chicago/RSocrata +BugReports: https://github.com/Chicago/RSocrata/issues diff --git a/NAMESPACE b/NAMESPACE index f995a38..08e66c5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,16 @@ +# Generated by roxygen2 (4.1.1): do not edit by hand + export(fieldName) +export(isFourByFour) +export(ls.socrata) export(posixify) export(read.socrata) -export(ls.socrata) -import(jsonlite) -import(httr) -import(mime) -import(curl) \ No newline at end of file +export(validateUrl) +importFrom(httr,GET) +importFrom(httr,build_url) +importFrom(httr,content) +importFrom(httr,http_status) +importFrom(httr,parse_url) +importFrom(httr,stop_for_status) +importFrom(jsonlite,fromJSON) +importFrom(mime,guess_type) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..a4fafee --- /dev/null +++ b/NEWS.md @@ -0,0 +1,45 @@ +### 1.1 +Add check for valid Socrata resource URL. Add check for supported download file format. Add support for Socrata short dates. + +### 1.2 +Use comma-separated file format for Socrata downloads. + +### 1.3 +* Added support for human-readable URL. Users can now copy and paste URLs of Socrata-hosted datasets, which will be transformed into a valid SoDA API web query. + +* Added additional RUnit tests to validate new functionality. + +### 1.4 +Add json file format for Socrata downloads. Switch to `RJSONIO` from `rjson`. + +### 1.5 Several changes: + +* Swapped to ```jsonlite``` from ```RJSONIO``` +* Added handling for long and short dates +* Added unit test for reading private datasets + +### 1.5.1 +Deprecated ```httr::guess_media()``` and implemented ```mime::guess_type()``` + +### 1.6.0 Several changes: + +* New function, ```ls.socrata``` to list all datasets on a Socrata portal. +* New optional argument, ```app_token```, which lets users supply an API token while using ```read.socrata()``` to minimize throttling. +* Repairs a bug where ```read.socrata``` failed when reading in a date with a column, but there are null values in that column. +* Minor changes to the DESCRIPTION documentation to point users to GitHub for issues and provides new contact information. + +### 1.6.1 Bug fixes: + +* Resolved potential [name collision issue](https://github.com/Chicago/RSocrata/issues/42) +* Cleaned-up documentation with contributor instructions [#23](https://github.com/Chicago/RSocrata/issues/23) and [#28](https://github.com/Chicago/RSocrata/issues/28)) +* Moved test coverage in `RUnit` to `testthat` and implemented code coverage monitoring ([#41](https://github.com/Chicago/RSocrata/issues/41)) +* Clean-up DESCRIPTION ([#40](https://github.com/Chicago/RSocrata/issues/40)) +* Add continuous integration for Windows ([#39](https://github.com/Chicago/RSocrata/issues/39)) +* Migrate Travis-CI to "proper" R YAML ([#46](https://github.com/Chicago/RSocrata/issues/46)) + + + + + + + diff --git a/R/RSocrata.R b/R/RSocrata.R index 9a30d23..2d77146 100644 --- a/R/RSocrata.R +++ b/R/RSocrata.R @@ -3,16 +3,17 @@ # Author: Hugh J. Devlin, Ph. D. 2013-08-28 ############################################################################### -library('httr') # for access to the HTTP header -library('jsonlite') # for parsing data types from Socrata -library('mime') # for guessing mime type +# library('httr') # for access to the HTTP header +# library('jsonlite') # for parsing data types from Socrata +# library('mime') # for guessing mime type #' Time-stamped message #' #' Issue a time-stamped, origin-stamped log message. -#' @param s a string +#' @param s - a string #' @return None (invisible NULL) as per cat #' @author Hugh J. Devlin \email{Hugh.Devlin@@cityofchicago.org} +#' @noRd logMsg <- function(s) { cat(format(Sys.time(), "%Y-%m-%d %H:%M:%OS3 "), as.character(sys.call(-1))[1], ": ", s, '\n', sep='') } @@ -23,9 +24,10 @@ logMsg <- function(s) { #' supported by Socrata. It will provide an exception if the syntax #' does not align to Socrata unique identifiers. It only checks for #' the validity of the syntax, but does not check if it actually exists. -#' @param fourByFour a string; character vector of length one +#' @param fourByFour - a string; character vector of length one #' @return TRUE if is valid Socrata unique identifier, FALSE otherwise #' @author Tom Schenk Jr \email{tom.schenk@@cityofchicago.org} +#' @export isFourByFour <- function(fourByFour) { fourByFour <- as.character(fourByFour) if(nchar(fourByFour) != 9) @@ -43,14 +45,16 @@ isFourByFour <- function(fourByFour) { #' URL. Will accept queries with optional API token as a separate #' argument or will also accept API token in the URL query. Will #' resolve conflicting API token by deferring to original URL. -#' @param url a string; character vector of length one -#' @param app_token a string; SODA API token used to query the data +#' @param url - a string; character vector of length one +#' @param app_token - a string; SODA API token used to query the data #' portal \url{http://dev.socrata.com/consumers/getting-started.html} -#' @return a valid Url +#' @return a - valid Url +#' @importFrom httr parse_url build_url #' @author Tom Schenk Jr \email{tom.schenk@@cityofchicago.org} +#' @export validateUrl <- function(url, app_token) { url <- as.character(url) - parsedUrl <- parse_url(url) + parsedUrl <- httr::parse_url(url) if(is.null(parsedUrl$scheme) | is.null(parsedUrl$hostname) | is.null(parsedUrl$path)) stop(url, " does not appear to be a valid URL.") if(!is.null(app_token)) { # Handles the addition of API token and resolves invalid uses @@ -67,14 +71,14 @@ validateUrl <- function(url, app_token) { }) } if(substr(parsedUrl$path, 1, 9) == 'resource/') { - return(build_url(parsedUrl)) # resource url already + return(httr::build_url(parsedUrl)) # resource url already } fourByFour <- basename(parsedUrl$path) if(!isFourByFour(fourByFour)) stop(fourByFour, " is not a valid Socrata dataset unique identifier.") else { parsedUrl$path <- paste('resource/', fourByFour, '.csv', sep="") - build_url(parsedUrl) + httr::build_url(parsedUrl) } } @@ -84,7 +88,7 @@ validateUrl <- function(url, app_token) { #' as it might appear in the first row of data, #' to field name as it might appear in the HTTP header; #' that is, lower case, periods replaced with underscores#' -#' @param humanName a Socrata human-readable column name +#' @param humanName - a Socrata human-readable column name #' @return Socrata field name #' @export #' @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org} @@ -96,7 +100,7 @@ fieldName <- function(humanName) { #' Convert Socrata calendar_date string to POSIX #' -#' @param x character vector in one of two Socrata calendar_date formats +#' @param x - character vector in one of two Socrata calendar_date formats #' @return a POSIX date #' @export #' @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org} @@ -110,35 +114,39 @@ posixify <- function(x) { strptime(x, format="%m/%d/%Y %I:%M:%S %p") # long date-time format } -# Wrap httr GET in some diagnostics -# -# In case of failure, report error details from Socrata -# -# @param url Socrata Open Data Application Program Interface (SODA) query -# @return httr response object -# @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org} +#' Wrap httr GET in some diagnostics +#' +#' In case of failure, report error details from Socrata +#' +#' @param url - Socrata Open Data Application Program Interface (SODA) query +#' @return httr response object +#' @importFrom httr http_status GET content stop_for_status +#' @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org} +#' @noRd getResponse <- function(url) { - response <- GET(url) - status <- http_status(response) + response <- httr::GET(url) + # status <- httr::http_status(response) if(response$status_code != 200) { msg <- paste("Error in httr GET:", response$status_code, response$headers$statusmessage, url) if(!is.null(response$headers$`content-length`) && (response$headers$`content-length` > 0)) { - details <- content(response) + details <- httr::content(response) msg <- paste(msg, details$code[1], details$message[1]) } logMsg(msg) } - stop_for_status(response) - response + httr::stop_for_status(response) + return(response) } -# Content parsers -# -# Return a data frame for csv -# -# @author Hugh J. Devlin \email{Hugh.Devlin@@cityofchicago.org} -# @param an httr response object -# @return data frame, possibly empty +#' Content parsers +#' +#' Return a data frame for csv +#' +#' @author Hugh J. Devlin \email{Hugh.Devlin@@cityofchicago.org} +#' @importFrom httr content +#' @param response - an httr response object +#' @return data frame, possibly empty +#' @noRd getContentAsDataFrame <- function(response) { UseMethod('response') } getContentAsDataFrame <- function(response) { mimeType <- response$header$'content-type' @@ -156,40 +164,44 @@ getContentAsDataFrame <- function(response) { ) # end switch } -# Get the SoDA 2 data types -# -# Get the Socrata Open Data Application Program Interface data types from the http response header -# @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org} -# @param responseHeaders headers attribute from an httr response object -# @return a named vector mapping field names to data types +#' Get the SoDA 2 data types +#' +#' Get the Socrata Open Data Application Program Interface data types from the http response header +#' @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org} +#' @param response - headers attribute from an httr response object +#' @return a named vector mapping field names to data types +#' @importFrom jsonlite fromJSON +#' @noRd getSodaTypes <- function(response) { UseMethod('response') } getSodaTypes <- function(response) { - result <- fromJSON(response$headers[['x-soda2-types']]) - names(result) <- fromJSON(response$headers[['x-soda2-fields']]) - result + result <- jsonlite::fromJSON(response$headers[['x-soda2-types']]) + names(result) <- jsonlite::fromJSON(response$headers[['x-soda2-fields']]) + return(result) } #' Get a full Socrata data set as an R data frame #' #' Manages throttling and POSIX date-time conversions #' -#' @param url A Socrata resource URL, +#' @param url - A Socrata resource URL, #' or a Socrata "human-friendly" URL, #' or Socrata Open Data Application Program Interface (SODA) query #' requesting a comma-separated download format (.csv suffix), #' May include SoQL parameters, #' but is assumed to not include a SODA offset parameter -#' @param app_token a string; SODA API token used to query the data +#' @param app_token - a string; SODA API token used to query the data #' portal \url{http://dev.socrata.com/consumers/getting-started.html} #' @return an R data frame with POSIX dates -#' @export #' @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org} #' @examples #' df <- read.socrata("http://soda.demo.socrata.com/resource/4334-bgaj.csv") +#' @importFrom httr parse_url build_url +#' @importFrom mime guess_type +#' @export read.socrata <- function(url, app_token = NULL) { validUrl <- validateUrl(url, app_token) # check url syntax, allow human-readable Socrata url - parsedUrl <- parse_url(validUrl) - mimeType <- guess_type(parsedUrl$path) + parsedUrl <- httr::parse_url(validUrl) + mimeType <- mime::guess_type(parsedUrl$path) if(!(mimeType %in% c('text/csv','application/json'))) stop("Error in read.socrata: ", mimeType, " not a supported data format.") response <- getResponse(validUrl) @@ -206,28 +218,30 @@ read.socrata <- function(url, app_token = NULL) { for(columnName in colnames(page)[!is.na(dataTypes[fieldName(colnames(page))]) & dataTypes[fieldName(colnames(page))] == 'calendar_date']) { result[[columnName]] <- posixify(result[[columnName]]) } - result + return(result) } #' List datasets available from a Socrata domain #' -#' @param url A Socrata URL. This simply points to the site root. +#' @param url - A Socrata URL. This simply points to the site root. #' @return an R data frame containing a listing of datasets along with #' various metadata. -#' @export #' @author Peter Schmiedeskamp \email{pschmied@@uw.edu} #' @examples #' df <- ls.socrata("http://soda.demo.socrata.com") +#' @importFrom jsonlite fromJSON +#' @importFrom httr parse_url +#' @export ls.socrata <- function(url) { url <- as.character(url) - parsedUrl <- parse_url(url) + parsedUrl <- httr::parse_url(url) if(is.null(parsedUrl$scheme) | is.null(parsedUrl$hostname)) stop(url, " does not appear to be a valid URL.") parsedUrl$path <- "data.json" - df <- fromJSON(build_url(parsedUrl)) + df <- jsonlite::fromJSON(httr::build_url(parsedUrl)) df <- as.data.frame(df$dataset) df$issued <- as.POSIXct(df$issued) df$modified <- as.POSIXct(df$modified) df$theme <- as.character(df$theme) - df -} \ No newline at end of file + return(df) +} diff --git a/R/tests/testRSocrata.R b/R/tests/testRSocrata.R deleted file mode 100644 index 85aa7b3..0000000 --- a/R/tests/testRSocrata.R +++ /dev/null @@ -1,196 +0,0 @@ -# RUnit tests -# -# resource 4334-bgaj on the Socrata demo site is USGS Earthquakes for 2012-11-01 API School Demo -# -# Author: Hugh 2013-07-15 -############################################################################### - -library('RUnit') - -source("R/RSocrata.R") - -test.posixifyLong <- function() { - dt <- posixify("09/14/2012 10:38:01 PM") - checkEquals("POSIXlt", class(dt)[1], "first data type of a date") - checkEquals(2012, dt$year + 1900, "year") - checkEquals(9, dt$mon + 1, "month") - checkEquals(14, dt$mday, "day") - checkEquals(22, dt$hour, "hours") - checkEquals(38, dt$min, "minutes") - checkEquals(1, dt$sec, "seconds") -} - -test.posixifyShort <- function() { - dt <- posixify("09/14/2012") - checkEquals("POSIXlt", class(dt)[1], "first data type of a date") - checkEquals(2012, dt$year + 1900, "year") - checkEquals(9, dt$mon + 1, "month") - checkEquals(14, dt$mday, "day") - checkEquals(0, dt$hour, "hours") - checkEquals(0, dt$min, "minutes") - checkEquals(0, dt$sec, "seconds") -} - -test.readSocrataCsv <- function() { - df <- read.socrata('https://soda.demo.socrata.com/resource/4334-bgaj.csv') - checkEquals(1007, nrow(df), "rows") - checkEquals(9, ncol(df), "columns") -} - -test.readSocrataJson <- function() { - df <- read.socrata('https://soda.demo.socrata.com/resource/4334-bgaj.json') - checkEquals(1007, nrow(df), "rows") - checkEquals(11, ncol(df), "columns") -} - -test.readSocrataNoScheme <- function() { - checkException(read.socrata('soda.demo.socrata.com/resource/4334-bgaj.csv')) -} - -test.readSoQL <- function() { - df <- read.socrata('http://soda.demo.socrata.com/resource/4334-bgaj.csv?$select=region') - checkEquals(1007, nrow(df), "rows") - checkEquals(1, ncol(df), "columns") -} - -test.readSoQLColumnNotFound <- function() { - # SoQL API uses field names, not human names - checkException(read.socrata('http://soda.demo.socrata.com/resource/4334-bgaj.csv?$select=Region')) -} - -test.readPrivate <- function() { - checkException(read.socrata('http://data.cityofchicago.org/resource/j8vp-2qpg.json')) -} - -test.readSocrataHumanReadable <- function() { - df <- read.socrata('https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj') - checkEquals(1007, nrow(df), "rows") - checkEquals(9, ncol(df), "columns") -} - -test.readSocrataFormatNotSupported <- function() { - # Unsupported data formats - checkException(read.socrata('http://soda.demo.socrata.com/resource/4334-bgaj.xml')) -} - -test.readSocrataCalendarDateLong <- function() { - df <- read.socrata('http://soda.demo.socrata.com/resource/4334-bgaj.csv') - dt <- df$Datetime[1] # "2012-09-14 22:38:01" - checkEquals("POSIXlt", class(dt)[1], "data type of a date") - checkEquals(2012, dt$year + 1900, "year") - checkEquals(9, dt$mon + 1, "month") - checkEquals(14, dt$mday, "day") - checkEquals(22, dt$hour, "hours") - checkEquals(38, dt$min, "minutes") - checkEquals(1, dt$sec, "seconds") -} - -test.readSocrataCalendarDateShort <- function() { - df <- read.socrata('http://data.cityofchicago.org/resource/y93d-d9e3.csv?$order=debarment_date') - dt <- df$DEBARMENT.DATE[1] # "05/21/1981" - checkEquals("POSIXlt", class(dt)[1], "data type of a date") - checkEquals(81, dt$year, "year") - checkEquals(5, dt$mon + 1, "month") - checkEquals(21, dt$mday, "day") - checkEquals(0, dt$hour, "hours") - checkEquals(0, dt$min, "minutes") - checkEquals(0, dt$sec, "seconds") -} - -test.isFourByFour <- function() { - checkTrue(isFourByFour("4334-bgaj"), "ok") - checkTrue(!isFourByFour("4334c-bgajc"), "11 characters instead of 9") - checkTrue(!isFourByFour("433-bga"), "7 characters instead of 9") - checkTrue(!isFourByFour("433-bgaj"), "3 characters before dash instead of 4") - checkTrue(!isFourByFour("4334-!gaj"), "non-alphanumeric character") -} - -test.isFourByFourUrl <- function() { - checkException(read.socrata("https://soda.demo.socrata.com/api/views/4334c-bgajc"), "11 characters instead of 9") - checkException(read.socrata("https://soda.demo.socrata.com/api/views/433-bga"), "7 characters instead of 9") - checkException(read.socrata("https://soda.demo.socrata.com/api/views/433-bgaj"), "3 characters before dash instead of 4") - checkException(read.socrata("https://soda.demo.socrata.com/api/views/4334-!gaj"), "non-alphanumeric character") -} - -test.readSocrataInvalidUrl <- function() { - checkException(read.socrata("a.fake.url.being.tested"), "invalid url") -} - -test.readSocrataToken <- function(){ - df <- read.socrata('https://soda.demo.socrata.com/resource/4334-bgaj.csv', app_token="ew2rEMuESuzWPqMkyPfOSGJgE") - checkEquals(1007, nrow(df), "rows") - checkEquals(9, ncol(df), "columns") -} - -test.readSocrataHumanReadableToken <- function(){ - df <- read.socrata('https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj', app_token="ew2rEMuESuzWPqMkyPfOSGJgE") - checkEquals(1007, nrow(df), "rows") - checkEquals(9, ncol(df), "columns") -} - -test.readAPIConflict <- function(){ - df <- read.socrata('https://soda.demo.socrata.com/resource/4334-bgaj.csv?$$app_token=ew2rEMuESuzWPqMkyPfOSGJgE', app_token="ew2rEMuESuzWPqMkyPfOSUSER") - checkEquals(1007, nrow(df), "rows") - checkEquals(9, ncol(df), "columns") - # Check that function is calling the API token specified in url - checkTrue(substr(validateUrl('https://soda.demo.socrata.com/resource/4334-bgaj.csv?$$app_token=ew2rEMuESuzWPqMkyPfOSGJgE', app_token="ew2rEMuESuzWPqMkyPfOSUSER"), 70, 94)=="ew2rEMuESuzWPqMkyPfOSGJgE") -} - -test.readAPIConflictHumanReadable <- function(){ - df <- read.socrata('https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj?$$app_token=ew2rEMuESuzWPqMkyPfOSGJgE', app_token="ew2rEMuESuzWPqMkyPfOSUSER") - checkEquals(1007, nrow(df), "rows") - checkEquals(9, ncol(df), "columns") - # Check that function is calling the API token specified in url - checkTrue(substr(validateUrl('https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj?$$app_token=ew2rEMuESuzWPqMkyPfOSGJgE', app_token="ew2rEMuESuzWPqMkyPfOSUSER"), 70, 94)=="ew2rEMuESuzWPqMkyPfOSGJgE") -} - -test.incorrectAPIQuery <- function(){ - # The query below is missing a $ before app_token. - checkException(read.socrata("https://soda.demo.socrata.com/resource/4334-bgaj.csv?$app_token=ew2rEMuESuzWPqMkyPfOSGJgE")) - # Check that it was only because of missing $ - df <- read.socrata("https://soda.demo.socrata.com/resource/4334-bgaj.csv?$$app_token=ew2rEMuESuzWPqMkyPfOSGJgE") - checkEquals(1007, nrow(df), "rows") - checkEquals(9, ncol(df), "columns") -} - -test.incorrectAPIQueryHumanReadable <- function(){ - # The query below is missing a $ before app_token. - checkException(read.socrata("https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj?$app_token=ew2rEMuESuzWPqMkyPfOSGJgE")) - # Check that it was only because of missing $ - df <- read.socrata("https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj?$$app_token=ew2rEMuESuzWPqMkyPfOSGJgE") - checkEquals(1007, nrow(df), "rows") - checkEquals(9, ncol(df), "columns") -} - -test.lsSocrata <- function() { - # Makes some potentially erroneous assumptions about availability - # of soda.demo.socrata.com - df <- ls.socrata("https://soda.demo.socrata.com") - checkEquals(TRUE, nrow(df) > 0) - # Test comparing columns against data.json specifications: - # https://project-open-data.cio.gov/v1.1/schema/ - core_names <- as.character(c("issued", "modified", "keyword", "landingPage", "theme", - "title", "accessLevel", "distribution", "description", - "identifier", "publisher", "contactPoint", "license")) - checkEquals(as.logical(rep(TRUE, length(core_names))), core_names %in% names(df)) - # Check that all names in data.json are accounted for in ls.socrata return - checkEquals(as.logical(rep(TRUE, length(names(df)))), names(df) %in% c(core_names)) -} - -test.lsSocrataInvalidURL <- function() { - checkException(read.socrata("a.fake.url.being.tested"), "invalid url") -} - -test.suite <- defineTestSuite("test Socrata SODA interface", - dirs = file.path("R/tests"), - testFileRegexp = '^test.*\\.R') - -runAllTests <- function() { # Run during development, will complete regardless of errors - test.result <- runTestSuite(test.suite) - printTextProtocol(test.result) -} - -runAllTestsCI <- function() { # Ran for continuous integration tests, will stop if error found - test.result <- runTestSuite(test.suite) - if(getErrors(test.result)$nErr > 0 | getErrors(test.result)$nFail > 0) stop("TEST HAD ERRORS!") -} \ No newline at end of file diff --git a/README.md b/README.md index 98d3451..8c7f4c8 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,13 @@ RSocrata ======== -Master: [![Build Status - Master](https://api.travis-ci.org/Chicago/RSocrata.png?branch=master)](https://travis-ci.org/Chicago/RSocrata) +**Master** -Dev: [![Build Status - Dev](https://api.travis-ci.org/Chicago/RSocrata.png?branch=dev)](https://travis-ci.org/Chicago/RSocrata) +[![Linux build - Master](https://img.shields.io/travis/Chicago/RSocrata/master.svg?style=flat-square&label=Linux build)](https://travis-ci.org/Chicago/RSocrata)[![Windows build - Master](https://img.shields.io/appveyor/ci/tomschenkjr/RSocrata/master.svg?style=flat-square&label=Windows build)](https://ci.appveyor.com/project/tomschenkjr/rsocrata/branch/master)[![Coverage - Master](https://img.shields.io/coveralls/Chicago/RSocrata/master.svg?style=flat-square&label=Coverage - Master)](https://coveralls.io/r/Chicago/RSocrata?branch=master) + +**Dev** + +[![Linux build - Dev](https://img.shields.io/travis/Chicago/RSocrata/dev.svg?style=flat-square&label=Linux build)](https://travis-ci.org/Chicago/RSocrata)[![Windows build - Dev](https://img.shields.io/appveyor/ci/tomschenkjr/RSocrata/dev.svg?style=flat-square&label=Windows build)](https://ci.appveyor.com/project/tomschenkjr/rsocrata/branch/dev)[![Coverage - Dev](https://img.shields.io/coveralls/Chicago/RSocrata/dev.svg?style=flat-square&label=Coverage status - Dev)](https://coveralls.io/r/Chicago/RSocrata?branch=dev) A tool for downloading Socrata datasets as R data frames -------------------------------------------------------- @@ -19,19 +23,19 @@ Supports [SoDA query parameters](http://dev.socrata.com/docs/queries.html) in th Use ```ls.socrata()``` to list all datasets available on a Socrata webserver. -[RUnit](http://cran.r-project.org/web/packages/RUnit/index.html) test coverage. +[testthat](http://cran.r-project.org/web/packages/testthat/index.html) test coverage. ### Example: Reading SoDA valid URLs ```r -earthquakesDataFrame <- read.socrata("http://soda.demo.socrata.com/resource/4334-bgaj.csv")
-nrow(earthquakesDataFrame) # 1007 (two "pages")
+earthquakesDataFrame <- read.socrata("http://soda.demo.socrata.com/resource/4334-bgaj.csv") +nrow(earthquakesDataFrame) # 1007 (two "pages") class(earthquakesDataFrame$Datetime[1]) # POSIXlt ``` ### Example: Reading "human-readable" URLs ```r -earthquakesDataFrame <- read.socrata("https://soda.demo.socrata.com/dataset/USGS-Earthquakes-for-2012-11-01-API-School-Demo/4334-bgaj")
-nrow(earthquakesDataFrame) # 1007 (two "pages")
+earthquakesDataFrame <- read.socrata("https://soda.demo.socrata.com/dataset/USGS-Earthquakes-for-2012-11-01-API-School-Demo/4334-bgaj") +nrow(earthquakesDataFrame) # 1007 (two "pages") class(earthquakesDataFrame$Datetime[1]) # POSIXlt ``` @@ -53,24 +57,6 @@ allSitesDataFrame$title # Names of each dataset Please report issues, request enhancements or fork us at the [City of Chicago github](https://github.com/Chicago/RSocrata/issues). -### Change log - -1.1 Add check for valid Socrata resource URL. Add check for supported download file format. Add support for Socrata short dates. - -1.2 Use comma-separated file format for Socrata downloads. - -1.3 Added support for human-readable URL. - -1.4 Add json file format for Socrata downloads. Switch to RJSONIO rom rjson. - -1.5 Several changes: -* Swapped ```jsonlite``` to ```RJSONIO``` -* Added handling for long and short dates -* Added unit test for reading private datasets - -1.5.1 Deprecated ```httr::guess_media()``` and implemented ```httr::guess_type()``` +### Contributing -1.6.0 Several changes: -* New function, ```ls.socrata``` to list all datasets on a Socrata portal. -* New optional argument, ```app_token```, which lets users supply an API token while using ```read.socrata() to minimize throttling. -* Repairs a bug where ```read.socrata``` failed when reading in a date with a column, but there are null values in that column. +If you would like to contribute to this project, please see the [contributing documentation](CONTRIBUTING.md) and the [product roadmap](https://github.com/Chicago/RSocrata/wiki/Roadmap#planned-releases). diff --git a/RSocrata.Rproj b/RSocrata.Rproj index b785b09..584b854 100644 --- a/RSocrata.Rproj +++ b/RSocrata.Rproj @@ -13,6 +13,7 @@ RnwWeave: Sweave LaTeX: pdfLaTeX BuildType: Package +PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source PackageCheckArgs: --as-cran -PackageRoxygenize: rd +PackageRoxygenize: rd,collate,namespace diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..acf5dcc --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,39 @@ +environment: + WARNINGS_ARE_ERRORS: 1 + +init: + ps: | + $ErrorActionPreference = "Stop" + Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" + Import-Module '..\appveyor-tool.ps1' +install: + ps: Bootstrap + +build_script: + - travis-tool.sh install_deps + +test_script: + - Rscript -e "install.packages('roxygen2', repos='http://cran.us.r-project.org'); library(roxygen2); roxygen2::roxygenize(package.dir='.', roclets=c('rd', 'collate', 'namespace'))" + - travis-tool.sh run_tests + +on_failure: + - travis-tool.sh dump_logs + +artifacts: + - path: '*.Rcheck\**\*.log' + name: Logs + + - path: '*.Rcheck\**\*.out' + name: Logs + + - path: '*.Rcheck\**\*.fail' + name: Logs + + - path: '*.Rcheck\**\*.Rout' + name: Logs + + - path: '\*_*.tar.gz' + name: Bits + + - path: '\*_*.zip' + name: Bits \ No newline at end of file diff --git a/man/fieldName.Rd b/man/fieldName.Rd new file mode 100644 index 0000000..842e19e --- /dev/null +++ b/man/fieldName.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RSocrata.R +\name{fieldName} +\alias{fieldName} +\title{Convert Socrata human-readable column name to field name} +\usage{ +fieldName(humanName) +} +\arguments{ +\item{humanName}{- a Socrata human-readable column name} +} +\value{ +Socrata field name +} +\description{ +Convert Socrata human-readable column name, +as it might appear in the first row of data, +to field name as it might appear in the HTTP header; +that is, lower case, periods replaced with underscores#' +} +\examples{ +fieldName("Number.of.Stations") # number_of_stations +} +\author{ +Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@cityofchicago.org} +} + diff --git a/man/isFourByFour.Rd b/man/isFourByFour.Rd new file mode 100644 index 0000000..1053d42 --- /dev/null +++ b/man/isFourByFour.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RSocrata.R +\name{isFourByFour} +\alias{isFourByFour} +\title{Checks the validity of the syntax for a potential Socrata dataset Unique Identifier, also known as a 4x4.} +\usage{ +isFourByFour(fourByFour) +} +\arguments{ +\item{fourByFour}{- a string; character vector of length one} +} +\value{ +TRUE if is valid Socrata unique identifier, FALSE otherwise +} +\description{ +Will check the validity of a potential dataset unique identifier +supported by Socrata. It will provide an exception if the syntax +does not align to Socrata unique identifiers. It only checks for +the validity of the syntax, but does not check if it actually exists. +} +\author{ +Tom Schenk Jr \email{tom.schenk@cityofchicago.org} +} + diff --git a/man/ls.socrata.Rd b/man/ls.socrata.Rd new file mode 100644 index 0000000..3fe5934 --- /dev/null +++ b/man/ls.socrata.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RSocrata.R +\name{ls.socrata} +\alias{ls.socrata} +\title{List datasets available from a Socrata domain} +\usage{ +ls.socrata(url) +} +\arguments{ +\item{url}{- A Socrata URL. This simply points to the site root.} +} +\value{ +an R data frame containing a listing of datasets along with +various metadata. +} +\description{ +List datasets available from a Socrata domain +} +\examples{ +df <- ls.socrata("http://soda.demo.socrata.com") +} +\author{ +Peter Schmiedeskamp \email{pschmied@uw.edu} +} + diff --git a/man/posixify.Rd b/man/posixify.Rd new file mode 100644 index 0000000..269daba --- /dev/null +++ b/man/posixify.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RSocrata.R +\name{posixify} +\alias{posixify} +\title{Convert Socrata calendar_date string to POSIX} +\usage{ +posixify(x) +} +\arguments{ +\item{x}{- character vector in one of two Socrata calendar_date formats} +} +\value{ +a POSIX date +} +\description{ +Convert Socrata calendar_date string to POSIX +} +\author{ +Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@cityofchicago.org} +} + diff --git a/man/read.socrata.Rd b/man/read.socrata.Rd new file mode 100644 index 0000000..61c3e73 --- /dev/null +++ b/man/read.socrata.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RSocrata.R +\name{read.socrata} +\alias{read.socrata} +\title{Get a full Socrata data set as an R data frame} +\usage{ +read.socrata(url, app_token = NULL) +} +\arguments{ +\item{url}{- A Socrata resource URL, +or a Socrata "human-friendly" URL, +or Socrata Open Data Application Program Interface (SODA) query +requesting a comma-separated download format (.csv suffix), +May include SoQL parameters, +but is assumed to not include a SODA offset parameter} + +\item{app_token}{- a string; SODA API token used to query the data +portal \url{http://dev.socrata.com/consumers/getting-started.html}} +} +\value{ +an R data frame with POSIX dates +} +\description{ +Manages throttling and POSIX date-time conversions +} +\examples{ +df <- read.socrata("http://soda.demo.socrata.com/resource/4334-bgaj.csv") +} +\author{ +Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@cityofchicago.org} +} + diff --git a/man/validateUrl.Rd b/man/validateUrl.Rd new file mode 100644 index 0000000..ef9a17b --- /dev/null +++ b/man/validateUrl.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RSocrata.R +\name{validateUrl} +\alias{validateUrl} +\title{Convert, if necessary, URL to valid REST API URL supported by Socrata.} +\usage{ +validateUrl(url, app_token) +} +\arguments{ +\item{url}{- a string; character vector of length one} + +\item{app_token}{- a string; SODA API token used to query the data +portal \url{http://dev.socrata.com/consumers/getting-started.html}} +} +\value{ +a - valid Url +} +\description{ +Will convert a human-readable URL to a valid REST API call +supported by Socrata. It will accept a valid API URL if provided +by users and will also convert a human-readable URL to a valid API +URL. Will accept queries with optional API token as a separate +argument or will also accept API token in the URL query. Will +resolve conflicting API token by deferring to original URL. +} +\author{ +Tom Schenk Jr \email{tom.schenk@cityofchicago.org} +} + diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..8604f6c --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(RSocrata) + +test_check("RSocrata") diff --git a/tests/testthat/test-all.R b/tests/testthat/test-all.R new file mode 100644 index 0000000..052e6f2 --- /dev/null +++ b/tests/testthat/test-all.R @@ -0,0 +1,196 @@ +library(testthat) +library(RSocrata) +library(httr) +library(jsonlite) +library(mime) + +context("posixify function") + +test_that("posixify returns Long format", { + dt <- posixify("09/14/2012 10:38:01 PM") + expect_equal("POSIXlt", class(dt)[1], label="first data type of a date") + expect_equal(2012, dt$year + 1900, label="year") + expect_equal(9, dt$mon + 1, label="month") + expect_equal(14, dt$mday, label="day") + expect_equal(22, dt$hour, label="hours") + expect_equal(38, dt$min, label="minutes") + expect_equal(1, dt$sec, label="seconds") +}) + + +test_that("posixify returns Short format", { + dt <- posixify("09/14/2012") + expect_equal("POSIXlt", class(dt)[1], label="first data type of a date") + expect_equal(2012, dt$year + 1900, label="year") + expect_equal(9, dt$mon + 1, label="month") + expect_equal(14, dt$mday, label="day") + expect_equal(0, dt$hour, label="hours") + expect_equal(0, dt$min, label="minutes") + expect_equal(0, dt$sec, label="seconds") +}) + +context("read Socrata") + +test_that("read Socrata CSV", { + df <- read.socrata('https://soda.demo.socrata.com/resource/4334-bgaj.csv') + expect_equal(1007, nrow(df), label="rows") + expect_equal(9, ncol(df), label="columns") +}) + +test_that("read Socrata JSON", { + df <- read.socrata('https://soda.demo.socrata.com/resource/4334-bgaj.json') + expect_equal(1007, nrow(df), label="rows") + expect_equal(11, ncol(df), label="columns") +}) + +test_that("read Socrata No Scheme", { + expect_error(read.socrata('soda.demo.socrata.com/resource/4334-bgaj.csv')) +}) + +test_that("readSoQL", { + df <- read.socrata('http://soda.demo.socrata.com/resource/4334-bgaj.csv?$select=region') + expect_equal(1007, nrow(df), label="rows") + expect_equal(1, ncol(df), label="columns") +}) + +test_that("readSoQLColumnNotFound (will fail)", { + # SoQL API uses field names, not human names + expect_error(read.socrata('http://soda.demo.socrata.com/resource/4334-bgaj.csv?$select=Region')) +}) + +test_that("URL is private (Unauthorized) (will fail)", { + expect_error(read.socrata('http://data.cityofchicago.org/resource/j8vp-2qpg.json')) +}) + +test_that("readSocrataHumanReadable", { + df <- read.socrata('https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj') + expect_equal(1007, nrow(df), label="rows") + expect_equal(9, ncol(df), label="columns") +}) + +test_that("format is not supported", { + # Unsupported data formats + expect_error(read.socrata('http://soda.demo.socrata.com/resource/4334-bgaj.xml')) +}) + +context("Socrata Calendar") + +test_that("Calendar Date Long", { + df <- read.socrata('http://soda.demo.socrata.com/resource/4334-bgaj.csv') + dt <- df$Datetime[1] # "2012-09-14 22:38:01" + expect_equal("POSIXlt", class(dt)[1], label="data type of a date") + expect_equal(2012, dt$year + 1900, label="year") + expect_equal(9, dt$mon + 1, label="month") + expect_equal(14, dt$mday, label="day") + expect_equal(22, dt$hour, label="hours") + expect_equal(38, dt$min, label="minutes") + expect_equal(1, dt$sec, label="seconds") +}) + +test_that("Calendar Date Short", { + df <- read.socrata('http://data.cityofchicago.org/resource/y93d-d9e3.csv?$order=debarment_date') + dt <- df$DEBARMENT.DATE[1] # "05/21/1981" + expect_equal("POSIXlt", class(dt)[1], label="data type of a date") + expect_equal(81, dt$year, label="year") + expect_equal(5, dt$mon + 1, label="month") + expect_equal(21, dt$mday, label="day") + expect_equal(0, dt$hour, label="hours") + expect_equal(0, dt$min, label="minutes") + expect_equal(0, dt$sec, label="seconds") +}) + +context("Checks the validity of 4x4") + +test_that("is 4x4", { + expect_true(isFourByFour("4334-bgaj"), label="ok") + expect_false(isFourByFour("4334c-bgajc"), label="11 characters instead of 9") + expect_false(isFourByFour("433-bga"), label="7 characters instead of 9") + expect_false(isFourByFour("433-bgaj"), label="3 characters before dash instead of 4") + expect_false(isFourByFour("4334-!gaj"), label="non-alphanumeric character") +}) + + +test_that("is 4x4 URL", { + expect_error(read.socrata("https://soda.demo.socrata.com/api/views/4334c-bgajc"), "4334c-bgajc is not a valid Socrata dataset unique identifier", label="11 characters instead of 9") + expect_error(read.socrata("https://soda.demo.socrata.com/api/views/433-bga"), "433-bga is not a valid Socrata dataset unique identifier", label="7 characters instead of 9") + expect_error(read.socrata("https://soda.demo.socrata.com/api/views/433-bgaj"), "433-bgaj is not a valid Socrata dataset unique identifier", label="3 characters before dash instead of 4") + expect_error(read.socrata("https://soda.demo.socrata.com/api/views/4334-!gaj"), "4334-!gaj is not a valid Socrata dataset unique identifier", label="non-alphanumeric character") +}) + +test_that("Invalid URL", { + expect_error(read.socrata("a.fake.url.being.tested"), "a.fake.url.being.tested does not appear to be a valid URL", label="invalid url") +}) + +context("Test Socrata with Token") + +test_that("CSV with Token", { + df <- read.socrata('https://soda.demo.socrata.com/resource/4334-bgaj.csv', app_token="ew2rEMuESuzWPqMkyPfOSGJgE") + expect_equal(1007, nrow(df), label="rows") + expect_equal(9, ncol(df), label="columns") +}) + + +test_that("readSocrataHumanReadableToken", { + df <- read.socrata('https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj', app_token="ew2rEMuESuzWPqMkyPfOSGJgE") + expect_equal(1007, nrow(df), label="rows") + expect_equal(9, ncol(df), label="columns") +}) + +test_that("API Conflict", { + df <- read.socrata('https://soda.demo.socrata.com/resource/4334-bgaj.csv?$$app_token=ew2rEMuESuzWPqMkyPfOSGJgE', app_token="ew2rEMuESuzWPqMkyPfOSUSER") + expect_equal(1007, nrow(df), label="rows") + expect_equal(9, ncol(df), label="columns") + # Check that function is calling the API token specified in url + expect_true(substr(validateUrl('https://soda.demo.socrata.com/resource/4334-bgaj.csv?$$app_token=ew2rEMuESuzWPqMkyPfOSGJgE', app_token="ew2rEMuESuzWPqMkyPfOSUSER"), 70, 94)=="ew2rEMuESuzWPqMkyPfOSGJgE") +}) + +test_that("readAPIConflictHumanReadable", { + df <- read.socrata('https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj?$$app_token=ew2rEMuESuzWPqMkyPfOSGJgE', app_token="ew2rEMuESuzWPqMkyPfOSUSER") + expect_equal(1007, nrow(df), label="rows") + expect_equal(9, ncol(df), label="columns") + # Check that function is calling the API token specified in url + expect_true(substr(validateUrl('https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj?$$app_token=ew2rEMuESuzWPqMkyPfOSGJgE', app_token="ew2rEMuESuzWPqMkyPfOSUSER"), 70, 94)=="ew2rEMuESuzWPqMkyPfOSGJgE") +}) + +test_that("incorrect API Query", { + # The query below is missing a $ before app_token. + expect_error(read.socrata("https://soda.demo.socrata.com/resource/4334-bgaj.csv?$app_token=ew2rEMuESuzWPqMkyPfOSGJgE")) + # Check that it was only because of missing $ + df <- read.socrata("https://soda.demo.socrata.com/resource/4334-bgaj.csv?$$app_token=ew2rEMuESuzWPqMkyPfOSGJgE") + expect_equal(1007, nrow(df), label="rows") + expect_equal(9, ncol(df), label="columns") +}) + + +test_that("incorrect API Query Human Readable", { + # The query below is missing a $ before app_token. + expect_error(read.socrata("https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj?$app_token=ew2rEMuESuzWPqMkyPfOSGJgE")) + # Check that it was only because of missing $ + df <- read.socrata("https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj?$$app_token=ew2rEMuESuzWPqMkyPfOSGJgE") + expect_equal(1007, nrow(df), label="rows") + expect_equal(9, ncol(df), label="columns") +}) + +test_that("List datasets available from a Socrata domain", { + # Makes some potentially erroneous assumptions about availability + # of soda.demo.socrata.com + df <- ls.socrata("https://soda.demo.socrata.com") + expect_equal(TRUE, nrow(df) > 0) + # Test comparing columns against data.json specifications: + # https://project-open-data.cio.gov/v1.1/schema/ + core_names <- as.character(c("issued", "modified", "keyword", "landingPage", "theme", + "title", "accessLevel", "distribution", "description", + "identifier", "publisher", "contactPoint", "license")) + expect_equal(as.logical(rep(TRUE, length(core_names))), core_names %in% names(df)) + # Check that all names in data.json are accounted for in ls.socrata return + expect_equal(as.logical(rep(TRUE, length(names(df)))), names(df) %in% c(core_names)) +}) + + + + + + + + +