Skip to content

Commit

Permalink
Rebuild site after more documentation and cleaning
Browse files Browse the repository at this point in the history
Interestingly, need to be careful with module name: module_xfaostat_L401_Fertilizer(); it was not recognized by drake when one _ was missing.
  • Loading branch information
realxinzhao committed Aug 8, 2023
1 parent 39c759a commit 9ffae91
Show file tree
Hide file tree
Showing 22 changed files with 475 additions and 355 deletions.
7 changes: 6 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# Generated by roxygen2: do not edit by hand

export(FAOSTAT_check_count_plot)
export(FAOSTAT_load_raw_data)
export(FAOSTAT_metadata)
export(FAO_AREA_RM_NONEXIST)
export(FF_FILL_NUMERATOR_DENOMINATOR)
export(FF_check_count_plot)
export(FF_join_checkmap)
export(FF_summary)
export(Moving_average)
export(approx_fun)
Expand Down Expand Up @@ -39,6 +40,8 @@ importFrom(XML,xmlParse)
importFrom(XML,xmlToDataFrame)
importFrom(assertthat,assert_that)
importFrom(data.table,data.table)
importFrom(dplyr,all_of)
importFrom(dplyr,any_of)
importFrom(dplyr,any_vars)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
Expand All @@ -59,6 +62,7 @@ importFrom(dplyr,mutate_at)
importFrom(dplyr,n)
importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,rename_at)
importFrom(dplyr,right_join)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
Expand All @@ -78,6 +82,7 @@ importFrom(graphics,title)
importFrom(magrittr,"%$%")
importFrom(magrittr,"%>%")
importFrom(methods,is)
importFrom(purrr,reduce)
importFrom(readr,read_csv)
importFrom(stats,aggregate)
importFrom(tibble,as_tibble)
Expand Down
34 changes: 22 additions & 12 deletions R/xfaostat_L100_constants.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.

# General behavior constants ======================================================================

# Directories ----

#DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata")

DIR_RAW_DATA_FAOSTAT <- "inst/extdata/aglu/FAO/FAOSTAT"
OUTPUT_Export_CSV = T
# Output GCAM csv
DIR_OUTPUT_CSV <- "inst/extdata/aglu/FAO/temp"
dir.create(file.path(DIR_OUTPUT_CSV), showWarnings = FALSE)



# Historical years of focus ----
Expand All @@ -12,19 +21,12 @@
Hist_Year_FBS <- seq(2010, 2019) # New FBS years
MIN_HIST_PP_YEAR = 2010 # first producer price year

# having issues with package check here
# comment this line out when building package
#DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata")
DIR_RAW_DATA_FAOSTAT <- "inst/extdata/aglu/FAO/FAOSTAT"


OUTPUT_Export_CSV = T
# Output GCAM csv
DIR_OUTPUT_CSV <- "inst/extdata/aglu/FAO/temp"
dir.create(file.path(DIR_OUTPUT_CSV), showWarnings = FALSE)


# Balance elements; used in Get_SUA_TEMPLATE and SUA_bal_adjust
# Balance elements ----
#*******************************************
# used in Get_SUA_TEMPLATE and SUA_bal_adjust

c("Opening stocks", "Production", "Import",
"Export", "Processed", "Food", "Feed", "Seed", "Other uses", "Loss", "Closing stocks",
Expand All @@ -44,3 +46,11 @@
REGIONAL_NUTRIENT_MASS_CONV_OUTLIER_BOUNDARY <- 0.15
Hist_MEAN_Year_NUTRIENT_MASS_CONV <- 2010:2019 # average cal per g



# Other utils ----
#*******************************************
# decimal places in ggplot
scaleFUN <- function(x) sprintf("%.0f", x)

#*******************************************
57 changes: 3 additions & 54 deletions R/xfaostat_L105_DataConnectionToSUA.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#' @param ... other optional parameters, depending on command
#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if
#' \code{command} is "MAKE") all the generated outputs
#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
#' @details This chunk compiles balanced supply utilization data bu connecting FAO datasets.
#' @importFrom assertthat assert_that
#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at
#' first case_when vars transmute
Expand Down Expand Up @@ -347,60 +347,9 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {


## 2.3. FN: Balance gross trade ----
# Fn adjusting gross trade in all regions to be consistent with average (world export and import)

#' Balance gross trade
#' @description Scale gross export and import in all regions to make them equal at the world level.
#' @param .DF An input dataframe with an element col including Import and Export
#' @param .MIN_TRADE_PROD_RATIO Trade will be removed if world total export or import over production is smaller than .MIN_TRADE_PROD_RATIO (1% default value)
#' @param .Reg_VAR Region variable name; default is ("area_code")
#' @param .GROUP_VAR Group variable; default is ("item_code", "year")
#' @return The same dataframe with balanced world export and import.

GROSS_TRADE_ADJUST <- function(.DF,
.MIN_TRADE_PROD_RATIO = 0.01,
.Reg_VAR = 'area_code',
.GROUP_VAR = c("item_code", "year")){

# assert .DF structure
assertthat::assert_that(all(c("element", .GROUP_VAR) %in% names(.DF)))
assertthat::assert_that(dplyr::is.grouped_df(.DF) == F)
assertthat::assert_that(all(c("Import", "Export", "Production") %in%
c(.DF %>% distinct(element) %>% pull)))

.DF %>%
# Join ExportScaler and ImportScaler
left_join(
.DF %>%
#group_by_at(vars(all_of(.GROUP_VAR), element)) %>%
#summarise(value = sum(value, na.rm = T), .groups = "drop") %>%
spread(element, value) %>%
group_by_at(vars(all_of(.GROUP_VAR))) %>%
# filter out items with zero world trade or production
# and replace na to zero later for scaler
replace_na(list(Export = 0, Import = 0, Production = 0)) %>%
filter(sum(Export) != 0, sum(Import) != 0, sum(Production) != 0) %>%
# world trade should be later than .MIN_TRADE_PROD_RATIO to have meaningful data
# depending on item group, .MIN_TRADE_PROD_RATIO can be set differently
filter(sum(Export) / sum(Production) > .MIN_TRADE_PROD_RATIO) %>%
filter(sum(Import) / sum(Production) > .MIN_TRADE_PROD_RATIO) %>%
# finally,
# use average gross trade value to calculate trade scaler
# the trade scalers will be applied to all regions
mutate(ExportScaler = (sum(Export) + sum(Import))/ 2 / sum(Export),
ImportScaler = (sum(Export) + sum(Import))/ 2 / sum(Import)) %>%
select(all_of(c(.Reg_VAR, .GROUP_VAR)), ExportScaler, ImportScaler) %>%
ungroup(),
by = c(all_of(c(.Reg_VAR, .GROUP_VAR)))) %>%
replace_na(list(ExportScaler = 0, ImportScaler = 0)) %>%
# If world export, import, or prod is 0, trade will be zero
mutate(value = case_when(
element %in% c("Export") ~ value * ExportScaler,
element %in% c("Import") ~ value * ImportScaler,
TRUE ~ value)) %>%
select(-ExportScaler, -ImportScaler)

}
#GROSS_TRADE_ADJUST function moved to helper functions



# 3. Process items in FAO_items to get Balanced SUA data ----
Expand Down
1 change: 0 additions & 1 deletion R/xfaostat_L201_Forestry.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ module_xfaostat_L201_Forestry <- function(command, ...) {
unit) %>%
filter(!is.na(value)) %>%
rm_accent("item", "area") -> FO1
rm(FO)

# 215 unique areas with production data
FO_area <-
Expand Down
3 changes: 1 addition & 2 deletions R/xfaostat_L401_Fertilizer.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#' @importFrom tibble tibble
#' @importFrom tidyr complete drop_na gather nesting spread replace_na fill
#' @author XZ 2023
module_xfaostatL401_Fertilizer <- function(command, ...) {
module_xfaostat_L401_Fertilizer <- function(command, ...) {

MODULE_INPUTS <-
c(FILE = "aglu/FAO/FAO_an_items_PRODSTAT",
Expand Down Expand Up @@ -59,7 +59,6 @@ module_xfaostatL401_Fertilizer <- function(command, ...) {
complete(nesting(area_code, area), nesting(item_code, item), nesting(element_code, element, unit), year) %>%
rm_accent("item", "area") -> RFN1

rm(RFN) # clean

## RFN1_Production ----
RFN1 %>%
Expand Down
146 changes: 71 additions & 75 deletions R/xfaostat_helper_funcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,6 @@ FAOSTAT_metadata <- function (code = NULL){
return(metadata)
}

FAOSTAT_download_bulk <- function(DATASETCODE,
DATA_FOLDER = DIR_RAW_DATA_FAOSTAT){

assertthat::assert_that(is.character(DATASETCODE))
assertthat::assert_that(is.character(DATA_FOLDER))


lapply(DATASETCODE, function(d){
metadata <- FAOSTAT_metadata(code = d)
url_bulk = metadata$filelocation

file_name <- basename(url_bulk)
download.file(url_bulk, file.path(DATA_FOLDER, file_name))
})

}



Expand Down Expand Up @@ -87,34 +71,6 @@ FAOSTAT_load_raw_data <- function(DATASETCODE,
}


#remove accent and apostrophe for cols in a df
#' rm_accent: Remove accent and prime in selected columns of a data frame
#'
#' @param .df Input data frame
#' @param ... A character set of column names
#' @importFrom magrittr %>%
#' @importFrom assertthat assert_that
#' @importFrom dplyr intersect mutate_at
#'
#' @return A data frame with accent and prime removed
#' @export

rm_accent <- function(.df, ...){

assertthat::assert_that(
length(intersect(c(...), names(.df))) == length(c(...)),
msg = "Columns listed not included in the data frame")

# .df %>%
# mutate_at(c(...), iconv, to = 'ASCII//TRANSLIT') %>%
# mutate_at(c(...), .funs = gsub, pattern = "\\'", replacement = "")

.df %>%
mutate(dplyr::across(c(...), iconv, to = 'ASCII//TRANSLIT')) %>%
mutate(dplyr::across(c(...), gsub, pattern = "\\'", replacement = ""))

}




Expand Down Expand Up @@ -313,7 +269,16 @@ FF_FILL_NUMERATOR_DENOMINATOR <- function(.DF, NUMERATOR_c, DENOMINATOR_c,
}


# A function to full join data frame to check mappings with common code
#' FF_join_checkmap: full-join data frames by a common COL_by variable to checking mapping
#'
#' @param DFs Data frames to be full joined.
#' @param COL_by By variable in join.
#' @param COL_rename Other common variables to rename (by adding df names as prefix) before the join.
#' @importFrom magrittr %>%
#' @importFrom dplyr rename_at select any_of all_of full_join
#' @importFrom purrr reduce
#' @return A joined data frame
#' @export
FF_join_checkmap <- function(DFs, COL_by, COL_rename){
lapply(DFs, function(df){

Expand All @@ -323,7 +288,17 @@ FF_join_checkmap <- function(DFs, COL_by, COL_rename){
}) %>% purrr:: reduce(full_join, by = COL_by)
}

# Count item_code and area_code by year
#' FF_check_count_plot: count item_code and area_code by year
#'
#' @param .DF Input data frame
#' @param .ELEMENT A set of elements (in Char) to focus. If empty, all elements are summarized
#' @importFrom dplyr summarize
#' @importFrom magrittr %>%
#' @importFrom tidyr gather
#' @importFrom ggplot2 ggplot aes facet_wrap geom_line theme_bw
#'
#' @return A plot summarizing the time-series of changing the count of item_code and area_code (grouped by element).
#' @export
FF_check_count_plot <- function(.DF, .ELEMENT = c()){
if (.ELEMENT %>% length() == 0 ) {
.DF %>% distinct(element) %>% pull -> .ELEMENT
Expand All @@ -338,6 +313,34 @@ FF_check_count_plot <- function(.DF, .ELEMENT = c()){
theme_bw()
}

#remove accent and apostrophe for cols in a df
#' rm_accent: Remove accent and prime in selected columns of a data frame
#'
#' @param .df Input data frame
#' @param ... A character set of column names
#' @importFrom magrittr %>%
#' @importFrom assertthat assert_that
#' @importFrom dplyr intersect mutate_at
#'
#' @return A data frame with accent and prime removed
#' @export

rm_accent <- function(.df, ...){

assertthat::assert_that(
length(intersect(c(...), names(.df))) == length(c(...)),
msg = "Columns listed not included in the data frame")

# .df %>%
# mutate_at(c(...), iconv, to = 'ASCII//TRANSLIT') %>%
# mutate_at(c(...), .funs = gsub, pattern = "\\'", replacement = "")

.df %>%
mutate(dplyr::across(c(...), iconv, to = 'ASCII//TRANSLIT')) %>%
mutate(dplyr::across(c(...), gsub, pattern = "\\'", replacement = ""))

}

assert_FBS_balance <- function(.DF){


Expand Down Expand Up @@ -441,33 +444,6 @@ SUA_bal_adjust <- function(.df){



#' FAOSTAT_check_count_plot: count item_code and area_code by year
#'
#' @param .DF Input data frame
#' @param .ELEMENT A set of elements (in Char) to focus. If empty, all elements are summarized
#' @importFrom dplyr summarize
#' @importFrom magrittr %>%
#' @importFrom tidyr gather
#' @importFrom ggplot2 ggplot aes facet_wrap geom_line theme_bw
#'
#' @return A plot summarizing the time-series of changing the count of item_code and area_code (grouped by element).
#' @export

FAOSTAT_check_count_plot <- function(.DF, .ELEMENT = c()){
if (.ELEMENT %>% length() == 0 ) {
.DF %>% distinct(element) %>% pull -> .ELEMENT
}
.DF %>% group_by(year, element) %>%
summarise(Country = length(unique(area_code)),
Item = length(unique(item_code)), .groups = "drop") %>%
gather(header, count, -year, -element) %>%
filter(element %in% .ELEMENT) %>%
ggplot() + facet_wrap(~header, scales = "free") +
geom_line(aes(x = year, y = count, color = element)) +
theme_bw()
}



#' Function saving dataset to csv file with headers
#'
Expand Down Expand Up @@ -520,10 +496,12 @@ output_csv_data <- function(gcam_dataset, col_type_nonyear,

#' Balance gross trade
#' @description Scale gross export and import in all regions to make them equal at the world level.
#'
#' @param .DF An input dataframe with an element col including Import and Export
#' @param .MIN_TRADE_PROD_RATIO Trade will be removed if world total export or import over production is smaller than .MIN_TRADE_PROD_RATIO (1% default value)
#' @param .Reg_VAR Region variable name; default is ("area_code")
#' @param .GROUP_VAR Group variable; default is ("item_code", "year")
#' @param .Reg_VAR Region variable name; default is area_code
#' @param .GROUP_VAR Group variable; default is item_code and year
#'
#' @return The same dataframe with balanced world export and import.

GROSS_TRADE_ADJUST <- function(.DF,
Expand Down Expand Up @@ -572,6 +550,24 @@ GROSS_TRADE_ADJUST <- function(.DF,
}


FAOSTAT_download_bulk <- function(DATASETCODE,
DATA_FOLDER = DIR_RAW_DATA_FAOSTAT){

assertthat::assert_that(is.character(DATASETCODE))
assertthat::assert_that(is.character(DATA_FOLDER))


lapply(DATASETCODE, function(d){
metadata <- FAOSTAT_metadata(code = d)
url_bulk = metadata$filelocation

file_name <- basename(url_bulk)
download.file(url_bulk, file.path(DATA_FOLDER, file_name))
})

}


# decimal places in ggplot
scaleFUN <- function(x) sprintf("%.0f", x)

Expand Down
2 changes: 1 addition & 1 deletion docs/pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ pandoc: 3.1.1
pkgdown: 2.0.7
pkgdown_sha: ~
articles: {}
last_built: 2023-08-08T17:50Z
last_built: 2023-08-08T19:05Z

Loading

0 comments on commit 9ffae91

Please sign in to comment.