Rebuild site after more documentation and cleaning

Interestingly, need to be careful with module name: module_xfaostat_L401_Fertilizer(); it was not recognized by drake when one _ was missing.
JGCRI · Aug 8, 2023 · 9ffae91 · 9ffae91
1 parent 39c759a
commit 9ffae91
Show file tree

Hide file tree

Showing 22 changed files with 475 additions and 355 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,10 +1,11 @@
 # Generated by roxygen2: do not edit by hand
 
-export(FAOSTAT_check_count_plot)
 export(FAOSTAT_load_raw_data)
 export(FAOSTAT_metadata)
 export(FAO_AREA_RM_NONEXIST)
 export(FF_FILL_NUMERATOR_DENOMINATOR)
+export(FF_check_count_plot)
+export(FF_join_checkmap)
 export(FF_summary)
 export(Moving_average)
 export(approx_fun)
@@ -39,6 +40,8 @@ importFrom(XML,xmlParse)
 importFrom(XML,xmlToDataFrame)
 importFrom(assertthat,assert_that)
 importFrom(data.table,data.table)
+importFrom(dplyr,all_of)
+importFrom(dplyr,any_of)
 importFrom(dplyr,any_vars)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,case_when)
@@ -59,6 +62,7 @@ importFrom(dplyr,mutate_at)
 importFrom(dplyr,n)
 importFrom(dplyr,pull)
 importFrom(dplyr,rename)
+importFrom(dplyr,rename_at)
 importFrom(dplyr,right_join)
 importFrom(dplyr,select)
 importFrom(dplyr,summarise)
@@ -78,6 +82,7 @@ importFrom(graphics,title)
 importFrom(magrittr,"%$%")
 importFrom(magrittr,"%>%")
 importFrom(methods,is)
+importFrom(purrr,reduce)
 importFrom(readr,read_csv)
 importFrom(stats,aggregate)
 importFrom(tibble,as_tibble)

diff --git a/R/xfaostat_L100_constants.R b/R/xfaostat_L100_constants.R
@@ -1,6 +1,15 @@
-# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.
 
-# General behavior constants ======================================================================
+
+  # Directories ----
+
+  #DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata")
+
+  DIR_RAW_DATA_FAOSTAT <- "inst/extdata/aglu/FAO/FAOSTAT"
+  OUTPUT_Export_CSV = T
+  # Output GCAM csv
+  DIR_OUTPUT_CSV <- "inst/extdata/aglu/FAO/temp"
+  dir.create(file.path(DIR_OUTPUT_CSV), showWarnings = FALSE)
+
 
 
   # Historical years of focus ----
@@ -12,19 +21,12 @@
   Hist_Year_FBS <- seq(2010, 2019) # New FBS years
   MIN_HIST_PP_YEAR = 2010 # first producer price year
 
-# having issues with package check here
-# comment this line out when building package
-  #DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata")
-  DIR_RAW_DATA_FAOSTAT <- "inst/extdata/aglu/FAO/FAOSTAT"
 
 
-  OUTPUT_Export_CSV = T
-  # Output GCAM csv
-  DIR_OUTPUT_CSV <- "inst/extdata/aglu/FAO/temp"
-  dir.create(file.path(DIR_OUTPUT_CSV), showWarnings = FALSE)
-
 
-  # Balance elements; used in Get_SUA_TEMPLATE and SUA_bal_adjust
+  # Balance elements ----
+  #*******************************************
+  # used in Get_SUA_TEMPLATE and SUA_bal_adjust
 
   c("Opening stocks", "Production", "Import",
     "Export", "Processed", "Food", "Feed", "Seed", "Other uses", "Loss", "Closing stocks",
@@ -44,3 +46,11 @@
   REGIONAL_NUTRIENT_MASS_CONV_OUTLIER_BOUNDARY <- 0.15
   Hist_MEAN_Year_NUTRIENT_MASS_CONV <- 2010:2019 # average cal per g
 
+
+
+  # Other utils ----
+  #*******************************************
+  # decimal places in ggplot
+  scaleFUN <- function(x) sprintf("%.0f", x)
+
+  #*******************************************
diff --git a/R/xfaostat_L105_DataConnectionToSUA.R b/R/xfaostat_L105_DataConnectionToSUA.R
@@ -8,7 +8,7 @@
 #' @param ... other optional parameters, depending on command
 #' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if
 #'   \code{command} is "MAKE") all the generated outputs
-#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
+#' @details This chunk compiles balanced supply utilization data bu connecting FAO datasets.
 #' @importFrom assertthat assert_that
 #' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at
 #' first case_when vars transmute
@@ -347,60 +347,9 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
 
 
     ## 2.3. FN: Balance gross trade ----
-    # Fn adjusting gross trade in all regions to be consistent with average (world export and import)
-
-    #' Balance gross trade
-    #' @description Scale gross export and import in all regions to make them equal at the world level.
-    #' @param .DF An input dataframe with an element col including Import and Export
-    #' @param .MIN_TRADE_PROD_RATIO Trade will be removed if world total export or import over production is smaller than .MIN_TRADE_PROD_RATIO (1% default value)
-    #' @param .Reg_VAR Region variable name; default is ("area_code")
-    #' @param .GROUP_VAR Group variable; default is ("item_code", "year")
-    #' @return The same dataframe with balanced world export and import.
-
-    GROSS_TRADE_ADJUST <- function(.DF,
-                                   .MIN_TRADE_PROD_RATIO = 0.01,
-                                   .Reg_VAR = 'area_code',
-                                   .GROUP_VAR = c("item_code", "year")){
-
-      # assert .DF structure
-      assertthat::assert_that(all(c("element", .GROUP_VAR) %in% names(.DF)))
-      assertthat::assert_that(dplyr::is.grouped_df(.DF) == F)
-      assertthat::assert_that(all(c("Import", "Export", "Production") %in%
-                                    c(.DF %>% distinct(element) %>% pull)))
-
-      .DF %>%
-        # Join ExportScaler and ImportScaler
-        left_join(
-          .DF %>%
-            #group_by_at(vars(all_of(.GROUP_VAR), element)) %>%
-            #summarise(value = sum(value, na.rm = T), .groups = "drop") %>%
-            spread(element, value) %>%
-            group_by_at(vars(all_of(.GROUP_VAR))) %>%
-            # filter out items with zero world trade or production
-            # and replace na to zero later for scaler
-            replace_na(list(Export = 0, Import = 0, Production = 0)) %>%
-            filter(sum(Export) != 0, sum(Import) != 0, sum(Production) != 0) %>%
-            # world trade should be later than .MIN_TRADE_PROD_RATIO to have meaningful data
-            # depending on item group, .MIN_TRADE_PROD_RATIO can be set differently
-            filter(sum(Export) / sum(Production) > .MIN_TRADE_PROD_RATIO) %>%
-            filter(sum(Import) / sum(Production) > .MIN_TRADE_PROD_RATIO) %>%
-            # finally,
-            # use average gross trade value to calculate trade scaler
-            # the trade scalers will be applied to all regions
-            mutate(ExportScaler = (sum(Export) + sum(Import))/ 2 / sum(Export),
-                   ImportScaler = (sum(Export) + sum(Import))/ 2 / sum(Import)) %>%
-            select(all_of(c(.Reg_VAR, .GROUP_VAR)), ExportScaler, ImportScaler) %>%
-            ungroup(),
-          by = c(all_of(c(.Reg_VAR, .GROUP_VAR)))) %>%
-        replace_na(list(ExportScaler = 0, ImportScaler = 0)) %>%
-        # If world export, import, or prod is 0, trade will be zero
-        mutate(value = case_when(
-          element %in% c("Export") ~ value * ExportScaler,
-          element %in% c("Import") ~ value * ImportScaler,
-          TRUE ~ value)) %>%
-        select(-ExportScaler, -ImportScaler)
 
-    }
+    #GROSS_TRADE_ADJUST function moved to helper functions
+
 
 
     # 3. Process items in FAO_items to get Balanced SUA data ----

diff --git a/R/xfaostat_L201_Forestry.R b/R/xfaostat_L201_Forestry.R
@@ -60,7 +60,6 @@ module_xfaostat_L201_Forestry <- function(command, ...) {
              unit) %>%
       filter(!is.na(value)) %>%
       rm_accent("item", "area") -> FO1
-    rm(FO)
 
     # 215 unique areas with production data
     FO_area <-

diff --git a/R/xfaostat_L401_Fertilizer.R b/R/xfaostat_L401_Fertilizer.R
@@ -15,7 +15,7 @@
 #' @importFrom tibble tibble
 #' @importFrom tidyr complete drop_na gather nesting spread replace_na fill
 #' @author XZ 2023
-module_xfaostatL401_Fertilizer <- function(command, ...) {
+module_xfaostat_L401_Fertilizer <- function(command, ...) {
 
   MODULE_INPUTS <-
     c(FILE = "aglu/FAO/FAO_an_items_PRODSTAT",
@@ -59,7 +59,6 @@ module_xfaostatL401_Fertilizer <- function(command, ...) {
       complete(nesting(area_code, area), nesting(item_code, item), nesting(element_code, element, unit), year) %>%
       rm_accent("item", "area") -> RFN1
 
-    rm(RFN) # clean
 
     ## RFN1_Production ----
     RFN1 %>%

diff --git a/R/xfaostat_helper_funcs.R b/R/xfaostat_helper_funcs.R
@@ -27,22 +27,6 @@ FAOSTAT_metadata <- function (code = NULL){
   return(metadata)
 }
 
-FAOSTAT_download_bulk <- function(DATASETCODE,
-                                  DATA_FOLDER = DIR_RAW_DATA_FAOSTAT){
-
-  assertthat::assert_that(is.character(DATASETCODE))
-  assertthat::assert_that(is.character(DATA_FOLDER))
-
-
-  lapply(DATASETCODE, function(d){
-    metadata <- FAOSTAT_metadata(code = d)
-    url_bulk = metadata$filelocation
-
-    file_name <- basename(url_bulk)
-    download.file(url_bulk, file.path(DATA_FOLDER, file_name))
-  })
-
-}
 
 
 
@@ -87,34 +71,6 @@ FAOSTAT_load_raw_data <- function(DATASETCODE,
 }
 
 
-#remove accent and apostrophe for cols in a df
-#' rm_accent: Remove accent and prime in selected columns of a data frame
-#'
-#' @param .df Input data frame
-#' @param ... A character set of column names
-#' @importFrom  magrittr %>%
-#' @importFrom  assertthat assert_that
-#' @importFrom  dplyr intersect mutate_at
-#'
-#' @return A data frame with accent and prime removed
-#' @export
-
-rm_accent <- function(.df, ...){
-
-  assertthat::assert_that(
-    length(intersect(c(...), names(.df))) == length(c(...)),
-    msg = "Columns listed not included in the data frame")
-
-  # .df %>%
-  #   mutate_at(c(...), iconv,  to = 'ASCII//TRANSLIT') %>%
-  #   mutate_at(c(...), .funs = gsub, pattern = "\\'", replacement = "")
-
-  .df %>%
-    mutate(dplyr::across(c(...), iconv, to = 'ASCII//TRANSLIT')) %>%
-    mutate(dplyr::across(c(...), gsub, pattern = "\\'", replacement = ""))
-
-}
-
 
 
 
@@ -313,7 +269,16 @@ FF_FILL_NUMERATOR_DENOMINATOR <- function(.DF, NUMERATOR_c, DENOMINATOR_c,
 }
 
 
-# A function to full join data frame to check mappings with common code
+#' FF_join_checkmap: full-join data frames by a common COL_by variable to checking mapping
+#'
+#' @param DFs Data frames to be full joined.
+#' @param COL_by By variable in join.
+#' @param COL_rename Other common variables to rename (by adding df names as prefix) before the join.
+#' @importFrom  magrittr %>%
+#' @importFrom dplyr rename_at select any_of all_of full_join
+#' @importFrom purrr reduce
+#' @return A joined data frame
+#' @export
 FF_join_checkmap <- function(DFs, COL_by, COL_rename){
   lapply(DFs, function(df){
 
@@ -323,7 +288,17 @@ FF_join_checkmap <- function(DFs, COL_by, COL_rename){
   }) %>% purrr:: reduce(full_join, by = COL_by)
 }
 
-# Count item_code and area_code by year
+#' FF_check_count_plot: count item_code and area_code by year
+#'
+#' @param .DF Input data frame
+#' @param .ELEMENT A set of elements (in Char) to focus. If empty, all elements are summarized
+#' @importFrom  dplyr summarize
+#' @importFrom  magrittr %>%
+#' @importFrom  tidyr gather
+#' @importFrom  ggplot2 ggplot aes facet_wrap geom_line theme_bw
+#'
+#' @return A plot summarizing the time-series of changing the count of item_code and area_code (grouped by element).
+#' @export
 FF_check_count_plot <- function(.DF, .ELEMENT = c()){
   if (.ELEMENT %>% length() == 0 ) {
     .DF %>% distinct(element) %>% pull -> .ELEMENT
@@ -338,6 +313,34 @@ FF_check_count_plot <- function(.DF, .ELEMENT = c()){
     theme_bw()
 }
 
+#remove accent and apostrophe for cols in a df
+#' rm_accent: Remove accent and prime in selected columns of a data frame
+#'
+#' @param .df Input data frame
+#' @param ... A character set of column names
+#' @importFrom  magrittr %>%
+#' @importFrom  assertthat assert_that
+#' @importFrom  dplyr intersect mutate_at
+#'
+#' @return A data frame with accent and prime removed
+#' @export
+
+rm_accent <- function(.df, ...){
+
+  assertthat::assert_that(
+    length(intersect(c(...), names(.df))) == length(c(...)),
+    msg = "Columns listed not included in the data frame")
+
+  # .df %>%
+  #   mutate_at(c(...), iconv,  to = 'ASCII//TRANSLIT') %>%
+  #   mutate_at(c(...), .funs = gsub, pattern = "\\'", replacement = "")
+
+  .df %>%
+    mutate(dplyr::across(c(...), iconv, to = 'ASCII//TRANSLIT')) %>%
+    mutate(dplyr::across(c(...), gsub, pattern = "\\'", replacement = ""))
+
+}
+
 assert_FBS_balance <- function(.DF){
 
 
@@ -441,33 +444,6 @@ SUA_bal_adjust <- function(.df){
 
 
 
-#' FAOSTAT_check_count_plot: count item_code and area_code by year
-#'
-#' @param .DF Input data frame
-#' @param .ELEMENT A set of elements (in Char) to focus. If empty, all elements are summarized
-#' @importFrom  dplyr summarize
-#' @importFrom  magrittr %>%
-#' @importFrom  tidyr gather
-#' @importFrom  ggplot2 ggplot aes facet_wrap geom_line theme_bw
-#'
-#' @return A plot summarizing the time-series of changing the count of item_code and area_code (grouped by element).
-#' @export
-
-FAOSTAT_check_count_plot <- function(.DF, .ELEMENT = c()){
-  if (.ELEMENT %>% length() == 0 ) {
-    .DF %>% distinct(element) %>% pull -> .ELEMENT
-  }
-  .DF %>% group_by(year, element) %>%
-    summarise(Country = length(unique(area_code)),
-              Item = length(unique(item_code)), .groups = "drop") %>%
-    gather(header, count, -year, -element) %>%
-    filter(element %in% .ELEMENT) %>%
-    ggplot() + facet_wrap(~header, scales = "free") +
-    geom_line(aes(x = year, y = count, color = element)) +
-    theme_bw()
-}
-
-
 
 #' Function saving dataset to csv file with headers
 #'
@@ -520,10 +496,12 @@ output_csv_data <- function(gcam_dataset, col_type_nonyear,
 
 #' Balance gross trade
 #' @description Scale gross export and import in all regions to make them equal at the world level.
+#'
 #' @param .DF An input dataframe with an element col including Import and Export
 #' @param .MIN_TRADE_PROD_RATIO Trade will be removed if world total export or import over production is smaller than .MIN_TRADE_PROD_RATIO (1% default value)
-#' @param .Reg_VAR Region variable name; default is ("area_code")
-#' @param .GROUP_VAR Group variable; default is ("item_code", "year")
+#' @param .Reg_VAR Region variable name; default is area_code
+#' @param .GROUP_VAR Group variable; default is item_code and year
+#'
 #' @return The same dataframe with balanced world export and import.
 
 GROSS_TRADE_ADJUST <- function(.DF,
@@ -572,6 +550,24 @@ GROSS_TRADE_ADJUST <- function(.DF,
 }
 
 
+FAOSTAT_download_bulk <- function(DATASETCODE,
+                                  DATA_FOLDER = DIR_RAW_DATA_FAOSTAT){
+
+  assertthat::assert_that(is.character(DATASETCODE))
+  assertthat::assert_that(is.character(DATA_FOLDER))
+
+
+  lapply(DATASETCODE, function(d){
+    metadata <- FAOSTAT_metadata(code = d)
+    url_bulk = metadata$filelocation
+
+    file_name <- basename(url_bulk)
+    download.file(url_bulk, file.path(DATA_FOLDER, file_name))
+  })
+
+}
+
+
 # decimal places in ggplot
 scaleFUN <- function(x) sprintf("%.0f", x)
 

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
@@ -2,5 +2,5 @@ pandoc: 3.1.1
 pkgdown: 2.0.7
 pkgdown_sha: ~
 articles: {}
-last_built: 2023-08-08T17:50Z
+last_built: 2023-08-08T19:05Z