From 1f1f168614f093e9c02948d4bd9681c1886dc347 Mon Sep 17 00:00:00 2001 From: "Zhao, Xin" Date: Mon, 7 Aug 2023 13:58:01 -0400 Subject: [PATCH] quick naming updates --- R/xfaostat_L100_constants.R | 3 +- R/xfaostat_L101_RawDataPreProcessing3.R | 171 ++++++++++++++++++++++++ R/xfaostat_L105_DataConnectionToSUA.R | 7 +- xfaostat_L101_RawDataPreProcessing3.R | 149 --------------------- xfaostat_L200_ExportCSV.R | 143 ++++++++++++++++++++ 5 files changed, 319 insertions(+), 154 deletions(-) create mode 100644 R/xfaostat_L101_RawDataPreProcessing3.R delete mode 100644 xfaostat_L101_RawDataPreProcessing3.R create mode 100644 xfaostat_L200_ExportCSV.R diff --git a/R/xfaostat_L100_constants.R b/R/xfaostat_L100_constants.R index 5c4607f5..ced19286 100644 --- a/R/xfaostat_L100_constants.R +++ b/R/xfaostat_L100_constants.R @@ -7,7 +7,8 @@ DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata") - + # Output GCAM csv + DIR_OUT_CSV <- "inst/extdata/aglu/FAO" # Balance elements; used in Get_SUA_TEMPLATE and SUA_bal_adjust diff --git a/R/xfaostat_L101_RawDataPreProcessing3.R b/R/xfaostat_L101_RawDataPreProcessing3.R new file mode 100644 index 00000000..20804062 --- /dev/null +++ b/R/xfaostat_L101_RawDataPreProcessing3.R @@ -0,0 +1,171 @@ +# Copyright 2019 Battelle Memorial Institute; see the LICENSE file. + +#' module_xfaostat_L101_RawDataPreProcessing3 +#' +#' Preprocess raw faostat data +#' +#' @param command API command to execute +#' @param ... other optional parameters, depending on command +#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if +#' \code{command} is "MAKE") all the generated outputs +#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities. +#' @importFrom assertthat assert_that +#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at +#' first case_when vars +#' @importFrom tibble tibble +#' @importFrom tidyr complete drop_na gather nesting spread replace_na +#' @author XZ 2023 +module_xfaostat_L101_RawDataPreProcessing3 <- function(command, ...) { + + MODULE_INPUTS <- + c(FILE = "aglu/AGLU_ctry", + "QCL_area_code_map") + + MODULE_OUTPUTS <- + c("TCL", "TM_wide" # Gross and bilateral trade + ) + + if(command == driver.DECLARE_INPUTS) { + return(MODULE_INPUTS) + } else if(command == driver.DECLARE_OUTPUTS) { + return(MODULE_OUTPUTS) + } else if(command == driver.MAKE) { + + year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check. + + all_data <- list(...)[[1]] + + # Load required inputs ---- + + get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE) + + + + FAOSTAT_RDS <- c("TCL", "TM_wide") + + DIR_PREBUILT_FAOSTAT <- "data/PREBUILT_FAOSTAT" + + lapply(FAOSTAT_RDS, function(d){ + assertthat::assert_that(file.exists(file.path(DIR_PREBUILT_FAOSTAT, paste0(d, ".rds")))) + assign(d, readRDS(file.path(DIR_PREBUILT_FAOSTAT, paste0(d, ".rds"))), + envir = parent.env(environment())) + }) + + ### output TCL and clean memory ---- + TCL %>% + add_title("FAO TCL") %>% + add_units("tonne") %>% + add_comments("Preprocessed FAO TCL") -> + TCL + + TM_wide %>% + add_title("FAO TM") %>% + add_units("tonne") %>% + add_comments("Preprocessed FAO TM_wide") -> + TM_wide + + +# +# QCL_area_code <- QCL_area_code_map %>% distinct(area_code) %>% pull() +# +# # *[TCL] Gross trade ---- +# FAOSTAT_load_raw_data("TCL") # Gross trade +# +# TCL %>% distinct(element, element_code, unit) +# TCL %>% distinct(item, item_code) +# +# TCL %>% +# filter(item_code <= 1700, +# # only keep quantity +# !element_code %in% c(5622 , 5922), +# area_code %in% QCL_area_code) %>% +# select(area_code, area, item_code, item, element_code, element, year, value, unit) %>% +# rm_accent("item", "area") -> TCL1 +# +# ### output TCL and clean memory ---- +# TCL1 %>% +# add_title("FAO TCL") %>% +# add_units("tonne") %>% +# add_comments("Preprocessed FAO TCL") -> +# TCL +# +# rm(TCL1) +# +# +# +# # *[TM] Bilateral trade ---- +# #*FAO has better quality bilateral data since 1992, covering most SUA items +# FAOSTAT_load_raw_data("TM") # Bilateral trade +# +# TM %>% +# # Only keep quantities for elements with a unit of tonnes +# filter(element_code %in% c(5910, 5610), +# item_code < 1700, +# # Bilateral trade year starts from 1986 but higher quality after 1992 +# # Subset data also to shrink the size +# year >= 1992, +# partner_country_code %in% QCL_area_code, +# reporter_country_code %in% QCL_area_code) %>% +# select(reporter_country_code, reporter_countries, +# partner_country_code, partner_countries, +# item_code, item, element_code, element, year, value, unit) -> +# TM1 +# rm(TM) +# +# +# ## **Reconcile export and import bilateral flow ---- +# # Full join export and import and use available import to fill missing and zero export +# TM1 %>% filter(element %in% c("Export Quantity")) %>% spread(element, value) %>% +# select(exporter = reporter_country_code, +# importer = partner_country_code, item_code, year, expflow = `Export Quantity`) %>% +# full_join( +# TM1 %>% filter(element %in% c("Import Quantity")) %>% spread(element, value)%>% +# select(importer = reporter_country_code, +# exporter = partner_country_code, item_code, year, impflow = `Import Quantity`), +# by = c("exporter", "importer", "item_code", "year") +# ) %>% +# # replace na with zero but use import to replace zero export later +# replace_na(list(expflow = 0, impflow = 0)) %>% +# transmute(area_code = importer, year, item_code, source_code = exporter, +# value = if_else(expflow == 0, impflow, expflow)) %>% +# mutate(element = "Import Quantity") -> +# TM2 +# +# +# TM2 %>% +# # remove self-trade (per unaggregated area_code) which existed in FAO TM importing data and likely due to data processing mistakes. +# filter(area_code != source_code) %>% +# left_join(TM1 %>% distinct(item, item_code), by = c("item_code")) %>% +# left_join(TM1 %>% distinct(area = partner_countries, area_code = partner_country_code), by = c("area_code")) %>% +# left_join(TM1 %>% distinct(source = partner_countries, source_code = partner_country_code), by = c("source_code")) %>% +# rm_accent("item", "area", "source") %>% +# mutate(unit = "tonnes") -> +# TM3 +# rm(TM1, TM2) +# +# TM3 %>% spread(year, value) -> TM4 +# +# ### output OA and clean memory ---- +# TM4 %>% +# add_title("FAO TM") %>% +# add_units("tonne") %>% +# add_comments("Preprocessed FAO TM") -> +# TM +# +# rm(TM3, TM4) +# rm(QCL_area_code) +# +# ### output TM and clean memory ---- +# TM %>% +# add_title("FAO TM") %>% +# add_units("tonne") %>% +# add_comments("Preprocessed FAO TM_wide") -> +# TM_wide + + + return_data(MODULE_OUTPUTS) + + } else { + stop("Unknown command") + } +} diff --git a/R/xfaostat_L105_DataConnectionToSUA.R b/R/xfaostat_L105_DataConnectionToSUA.R index 51acda3d..902d3173 100644 --- a/R/xfaostat_L105_DataConnectionToSUA.R +++ b/R/xfaostat_L105_DataConnectionToSUA.R @@ -22,7 +22,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) { FILE = "aglu/FAO/Mapping_FBSH_SCL_OilCake") MODULE_OUTPUTS <- - c("GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019") + c("Bal_new_all") if(command == driver.DECLARE_INPUTS) { return(MODULE_INPUTS) @@ -759,11 +759,10 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) { ### output GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019 and clean memory ---- Bal_new_all %>% - spread(year, value) %>% - add_title("GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019") %>% + add_title("Bal_new_all") %>% add_units("Ktonne") %>% add_comments("Preprocessed FAO SUA 2010 - 2019") -> - GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019 + Bal_new_all return_data(MODULE_OUTPUTS) diff --git a/xfaostat_L101_RawDataPreProcessing3.R b/xfaostat_L101_RawDataPreProcessing3.R deleted file mode 100644 index a57d6c82..00000000 --- a/xfaostat_L101_RawDataPreProcessing3.R +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright 2019 Battelle Memorial Institute; see the LICENSE file. - -#' module_xfaostat_L101_RawDataPreProcessing3 -#' -#' Preprocess raw faostat data -#' -#' @param command API command to execute -#' @param ... other optional parameters, depending on command -#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if -#' \code{command} is "MAKE") all the generated outputs -#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities. -#' @importFrom assertthat assert_that -#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at -#' first case_when vars -#' @importFrom tibble tibble -#' @importFrom tidyr complete drop_na gather nesting spread replace_na -#' @author XZ 2023 -module_xfaostat_L101_RawDataPreProcessing3 <- function(command, ...) { - - MODULE_INPUTS <- - c(FILE = "aglu/AGLU_ctry", - "QCL_area_code_map") - - MODULE_OUTPUTS <- - c("TCL", "TM" # Gross and bilateral trade - ) - - if(command == driver.DECLARE_INPUTS) { - return(MODULE_INPUTS) - } else if(command == driver.DECLARE_OUTPUTS) { - return(MODULE_OUTPUTS) - } else if(command == driver.MAKE) { - - year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check. - - all_data <- list(...)[[1]] - - # Load required inputs ---- - - get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE) - - #source("data-raw/generate_package_data_faostat_helper_funcs.R") - #DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata.faostat") - - - QCL_area_code <- QCL_area_code_map %>% distinct(area_code) %>% pull() - - - # *[TCL] Gross trade ---- - FAOSTAT_load_raw_data("TCL") # Gross trade - - TCL %>% distinct(element, element_code, unit) - TCL %>% distinct(item, item_code) - - TCL %>% - filter(item_code <= 1700, - # only keep quantity - !element_code %in% c(5622 , 5922), - area_code %in% QCL_area_code) %>% - select(area_code, area, item_code, item, element_code, element, year, value, unit) %>% - rm_accent("item", "area") -> TCL1 - - ### output TCL and clean memory ---- - TCL1 %>% - add_title("FAO TCL") %>% - add_units("tonne") %>% - add_comments("Preprocessed FAO TCL") -> - TCL - - rm(TCL1) - - - - # *[TM] Bilateral trade ---- - #*FAO has better quality bilateral data since 1992, covering most SUA items - FAOSTAT_load_raw_data("TM") # Bilateral trade - - TM %>% - # Only keep quantities for elements with a unit of tonnes - filter(element_code %in% c(5910, 5610), - item_code < 1700, - # Bilateral trade year starts from 1986 but higher quality after 1992 - # Subset data also to shrink the size - year >= 1992, - partner_country_code %in% QCL_area_code, - reporter_country_code %in% QCL_area_code) %>% - select(reporter_country_code, reporter_countries, - partner_country_code, partner_countries, - item_code, item, element_code, element, year, value, unit) -> - TM1 - rm(TM) - - - ## **Reconcile export and import bilateral flow ---- - # Full join export and import and use available import to fill missing and zero export - TM1 %>% filter(element %in% c("Export Quantity")) %>% spread(element, value) %>% - select(exporter = reporter_country_code, - importer = partner_country_code, item_code, year, expflow = `Export Quantity`) %>% - full_join( - TM1 %>% filter(element %in% c("Import Quantity")) %>% spread(element, value)%>% - select(importer = reporter_country_code, - exporter = partner_country_code, item_code, year, impflow = `Import Quantity`), - by = c("exporter", "importer", "item_code", "year") - ) %>% - # replace na with zero but use import to replace zero export later - replace_na(list(expflow = 0, impflow = 0)) %>% - transmute(area_code = importer, year, item_code, source_code = exporter, - value = if_else(expflow == 0, impflow, expflow)) %>% - mutate(element = "Import Quantity") -> - TM2 - - - TM2 %>% - # remove self-trade (per unaggregated area_code) which existed in FAO TM importing data and likely due to data processing mistakes. - filter(area_code != source_code) %>% - left_join(TM1 %>% distinct(item, item_code), by = c("item_code")) %>% - left_join(TM1 %>% distinct(area = partner_countries, area_code = partner_country_code), by = c("area_code")) %>% - left_join(TM1 %>% distinct(source = partner_countries, source_code = partner_country_code), by = c("source_code")) %>% - rm_accent("item", "area", "source") %>% - mutate(unit = "tonnes") -> - TM3 - rm(TM1, TM2) - - TM3 %>% spread(year, value) -> TM4 - - ### output OA and clean memory ---- - TM4 %>% - add_title("FAO TM") %>% - add_units("tonne") %>% - add_comments("Preprocessed FAO TM") -> - TM - - rm(TM3, TM4) - rm(QCL_area_code) - - ### output OA and clean memory ---- - TM %>% - add_title("FAO TM") %>% - add_units("tonne") %>% - add_comments("Preprocessed FAO TM_wide") -> - TM_wide - - - return_data(MODULE_OUTPUTS) - - } else { - stop("Unknown command") - } -} diff --git a/xfaostat_L200_ExportCSV.R b/xfaostat_L200_ExportCSV.R new file mode 100644 index 00000000..4cc8db7f --- /dev/null +++ b/xfaostat_L200_ExportCSV.R @@ -0,0 +1,143 @@ +# Copyright 2019 Battelle Memorial Institute; see the LICENSE file. + +#' module_xfaostat_L200_ExportCSV +#' +#' Generate supply utilization balance in primary equivalent +#' +#' @param command API command to execute +#' @param ... other optional parameters, depending on command +#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if +#' \code{command} is "MAKE") all the generated outputs +#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities. +#' @importFrom assertthat assert_that +#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at +#' first case_when vars +#' @importFrom tibble tibble +#' @importFrom tidyr complete drop_na gather nesting spread replace_na +#' @author XZ 2022 +module_xfaostat_L200_ExportCSV <- function(command, ...) { + + MODULE_INPUTS <- + c("Bal_new_all", + "FBSH_CB", + "SUA_food_macronutrient_rate") + + MODULE_OUTPUTS <- + c("GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019", + "GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean", + "GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009") + + if(command == driver.DECLARE_INPUTS) { + return(MODULE_INPUTS) + } else if(command == driver.DECLARE_OUTPUTS) { + return(MODULE_OUTPUTS) + } else if(command == driver.MAKE) { + + year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check. + + all_data <- list(...)[[1]] + + # Load required inputs ---- + + get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE) + + + ## *GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019 ---- + + Bal_new_all %>% filter(value != 0.0) %>% + transmute(area_code, item_code, element, year, value) -> + GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019 + + output_csv_data("GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019", + col_type_nonyear = "iifin", + title = "Supply_utilization_accounting for all FAO items in 2010 to 2019", + unit = "1000 tonnes", code = "SCL", + description = "Data is compiled and generated by gcamdata-FAOSTAT. Data is balanced in trade, supply_utilization, and storage", + out_dir = DIR_OUT_CSV, GZIP = T) + + + # ## *GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020 ---- + # + # FF_load_RDS("TM_bilateral", DATA_FOLDER_PROC = DIR_PROCESSED_RDS) + # + # + # TM_bilateral %>% filter(year >= min(Hist_Year_FBS)) %>% + # FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL(SUDAN2012_MERGE = T) %>% + # # merge Sudan and South Sudan + # FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL(.FAO_AREA_CODE_COL = "source_code", + # .AREA_COL = "source", + # SUDAN2012_MERGE = T) -> + # TM_bilateral1 + # TM_bilateral1 %>% filter(value != 0.0) %>% + # transmute(area_code, item_code, source_code, year, value) -> + # GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020 + # + # + # output_csv_data("GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020", + # col_type_nonyear = "iiiin", + # title = "BiTrade for all available FAO items in 2010 to 2019", + # unit = "1000 tonnes", code = "TM", + # description = "Data is compiled and generated by gcamdata-FAOSTAT. Bilateral trade data.", + # out_dir = DIR_OUT_CSV, GZIP = T) + # + + + + ## *GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009 ---- + + FBSH_CB%>% + # merge Sudan and South Sudan + FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL(SUDAN2012_MERGE = T) -> + FBSH_CB + + + FBSH_CB %>% mutate(unit = "1000 tonnes", value = value / 1000) %>% + filter(year <= 2009) %>% + filter(!is.na(year)) %>% + spread(year, value) -> + GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009 + + + output_csv_data("GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009", + col_type_nonyear = "iiccc", + title = "Old FAO food balance sheet in primary equilvalent in 1973 to 2009", + unit = "1000 tonnes", code = "FBSH", + description = "Data is compiled and generated by gcamdata-FAOSTAT. FBSH and CB include old food and nonfood balances.", + out_dir = DIR_OUT_CSV, GZIP = T) + # ******************************---- + + # Macronutrient ---- + + ## *GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean---- + + + SUA_food_macronutrient_rate -> + GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean + + + # Fix Sudan with code 206 and 276 after 2012 + if (206 %in% GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean$area_code == F) { + GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean %>% + filter(area_code == 276) %>% mutate(area_code = 206) %>% + bind_rows(GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean) -> + GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean + } + + + output_csv_data("GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean", + col_type_nonyear = "iicnnn", + title = "Macronutrient conversion factor for food items, mean values of 2010 to 2019", + unit = "calories per g or (fat or protein) percentage", code = "SCL", + description = "Data is compiled and generated by gcamdata-FAOSTAT. Macronutrient conversion factor for all available FAO food items.", + out_dir = DIR_OUT_CSV, GZIP = T) + + + + + return_data(MODULE_OUTPUTS) + + + } else { + stop("Unknown command") + } +}