Skip to content

Commit

Permalink
Merge pull request #2 from WorldFishCenter/kobo-import
Browse files Browse the repository at this point in the history
Import landings from Kobo on a schedule
  • Loading branch information
efcaguab authored Mar 23, 2021
2 parents 789203b + 44b17d8 commit 28d4f3e
Show file tree
Hide file tree
Showing 23 changed files with 871 additions and 67 deletions.
5 changes: 4 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,8 @@
^_pkgdown\.yml$
^docs$
^pkgdown$
^Dockerfile$
^Dockerfile*
^docker-compose.yaml$
^.dockerignore$
^rstudio-prefs.json$
^auth$
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.github
.git
72 changes: 72 additions & 0 deletions .github/workflows/data-pipeline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
name: Peskas Timor Data Pipeline

on:
push:
schedule:
- cron: '0 0 * * *'

jobs:

build-container:
name: Build R container
runs-on: ubuntu-20.04
outputs:
r-image-name: ${{ steps.build-docker.outputs.FULL_IMAGE_NAME }}
r-config: ${{ steps.setvars.outputs.r-config }}
steps:

- name: Checkout repository
uses: actions/checkout@v2

- name: Set variables
id: setvars
run: |
if [[ "${{github.base_ref}}" == "main" || "${{github.ref}}" == "refs/heads/main" ]]; then
echo "::set-output name=r-config::production"
else
echo "::set-output name=r-config::default"
fi
- name: Get smart tag for docker image
id: get-tag
uses: Surgo/docker-smart-tag-action@v1

# This step is necessary to remove the colon from the beginning of the tag
- name: Remove colon from smart tag
id: remove-tag-colon
env:
IMAGE_TAG: ${{ steps.get-tag.outputs.tag }}
run: |
echo "::set-output name=tag::${IMAGE_TAG:1}"
- name: Build image with cache
id: build-docker
uses: whoan/docker-build-with-cache-action@v5
with:
username: $GITHUB_ACTOR
password: "${{ secrets.GITHUB_TOKEN }}"
registry: docker.pkg.github.com
image_name: r-runner-${{ steps.remove-tag-colon.outputs.tag }}
push_git_tag: true
dockerfile: Dockerfile.prod

ingest-landings:
name: Ingest landings
needs: build-container
runs-on: ubuntu-20.04
container:
image: ${{needs.build-container.outputs.r-image-name}}
env:
R_CONFIG_ACTIVE: ${{ needs.build-container.outputs.r-config }}
KOBO_TOKEN: ${{ secrets.PESKAS_KOBO_TOKEN }}
GCP_SA_KEY: ${{ secrets.PESKAS_DATAINGESTION_GCS_KEY }}
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
steps:

- name: Get session info
run: Rscript -e 'sessioninfo::session_info()'

- name: Call ingest_timor_landings()
run: Rscript -e 'peskas.timor.data.pipeline::ingest_timor_landings()'
32 changes: 23 additions & 9 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,19 +1,33 @@
Package: peskas.timor.data.pipeline
Title: Functions to Implement the Timor Small Scale Fisheries Data Pipeline
Title: Functions to Implement the Timor Small Scale Fisheries
Data Pipeline
Version: 0.0.0.9000
Authors@R:
c(person(given = "Fernando",
family = "Cagua",
role = c("aut", "cre"),
email = "[email protected]",
comment = c(ORCID = "0000-0001-5867-3687")),
family = "Cagua",
role = c("aut", "cre"),
email = "[email protected]",
comment = c(ORCID = "0000-0001-5867-3687")),
person(given = "WorldFish",
role = c("cph")))
Description: This package implements the data and modelling pipelines underpining the Peskas system.
role = "cph"))
Description: This package implements the data and modelling
pipelines underpining the Peskas system.
License: GPL-3
Imports:
config,
git2r,
httr,
logger,
magrittr,
purrr
Suggests:
testthat,
covr
covr,
googleCloudStorageR,
jsonlite,
RCurl,
remotes,
sessioninfo,
testthat
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM rocker/geospatial:4.0.3

# Extra R packages
RUN install2.r targets pkgdown here janitor skimr brms ggdist inspectdf
RUN install2.r targets pkgdown here janitor skimr brms ggdist inspectdf config jsonlite logger

# Rstudio interface preferences
COPY rstudio-prefs.json /home/rstudio/.config/rstudio/rstudio-prefs.json
29 changes: 29 additions & 0 deletions Dockerfile.prod
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
FROM rocker/r-ver:4.0.3

# Tidyverse system requirements
RUN apt-get update -qq && apt-get -y --no-install-recommends install \
libxml2-dev \
libcairo2-dev \
libgit2-dev \
default-libmysqlclient-dev \
libpq-dev \
libsasl2-dev \
libsqlite3-dev \
libssh2-1-dev \
unixodbc-dev && \
rm -rf /var/lib/apt/lists/*

# Install imports
RUN install2.r --error --skipinstalled \
config git2r httr logger magrittr purrr

# Install suggests
RUN install2.r --error --skipinstalled \
covr googleCloudStorageR jsonlite RCurl remotes sessioninfo testthat

# Install local package
COPY . /home
WORKDIR /home
RUN Rscript -e 'remotes::install_local(dependencies = TRUE)'

ENTRYPOINT ["Rscript"]
10 changes: 9 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# Generated by roxygen2: do not edit by hand

export(kobo_host)
export("%>%")
export(add_version)
export(download_survey)
export(download_survey_data)
export(download_survey_metadata)
export(get_host_url)
export(ingest_timor_landings)
export(upload_cloud_file)
importFrom(magrittr,"%>%")
60 changes: 60 additions & 0 deletions R/cloud-storage.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#' Upload a local file to a cloud storage bucket
#'
#' @param file a file-path (character) to upload. A vector with multiple files
#' is also supported.
#' @param provider cloud provider to use, either "gcs" or "aws"
#' @param options named list with cloud provider options, see details
#' @param name What to call the file once uploaded. Default is the filepath
#'
#' @details
#'
#' ### Google Cloud Services
#'
#' For Google Cloud Services ("gcs") options must be a list with two fields:
#' `bucket` with the bucketname (character) you are uploading to, and
#' `service_account_key` with the contents of the authentication json file you
#' have downloaded from your Google Project.
#'
#' This function uses [googleCloudStorageR::gcs_auth] and
#' [googleCloudStorageR::gcs_upload] under the hood to upload the file.
#'
#' @return If `provider` is "gcs" and if successful a list of medatada objects
#' @export
#'
#' @examples
#'
#' # Google Cloud Services
#' \dontrun{
#' authentication_details <- readLines("location_of_json_file.json")
#' upload_cloud_file(
#' file = "table_to_upload.csv",
#' provider = "gcs",
#' options = list(service_account_key = authentication_details,
#' bucket = "my-bucket"))
#' }
#'
upload_cloud_file <- function(file, provider, options, name = file){

out <- list()

if ("gcs" %in% provider) {

service_account_key <- options$service_account_key
temp_auth_file <- tempfile(fileext = 'json')
writeLines(service_account_key,temp_auth_file)
googleCloudStorageR::gcs_auth(json_file = temp_auth_file)

# Iterate over multiple files (and names)
google_output <- purrr::map2(
file, name,
~ googleCloudStorageR::gcs_upload(
file = .x,
bucket = options$bucket,
name = .y,
predefinedAcl = "bucketLevel"))

out <- c(out, google_output)
}

out
}
Loading

0 comments on commit 28d4f3e

Please sign in to comment.