Skip to content

Commit

Permalink
Update prebuilt structure and fixe driver() by improving precursors
Browse files Browse the repository at this point in the history
  • Loading branch information
realxinzhao committed Aug 16, 2023
1 parent 536d36e commit 5e66145
Show file tree
Hide file tree
Showing 9 changed files with 125 additions and 110 deletions.
3 changes: 2 additions & 1 deletion R/constants.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ OUTPUT_Export_CSV <- TRUE
## Fao raw data folder
DIR_RAW_DATA_FAOSTAT <- "inst/extdata/aglu/FAO/FAOSTAT"
## Output GCAM csv
DIR_OUTPUT_CSV <- "inst/extdata/aglu/FAO/temp"
#DIR_OUTPUT_CSV <- "inst/extdata/aglu/FAO/temp"
DIR_OUTPUT_CSV <- "outputs/CSV"
dir.create(file.path(DIR_OUTPUT_CSV), showWarnings = FALSE)


Expand Down
6 changes: 3 additions & 3 deletions R/xfaostat_L101_RawDataPreProc2_PP_PD_OA.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ module_xfaostat_L101_RawDataPreProc2_PP_PD_OA <- function(command, ...) {
FILE = "aglu/FAO/FAOSTAT/Other_supplementary/GDP_deflator_Taiwan",
"QCL_area_code_map")

MODULE_OUTPUTS <-

MODULE_OUTPUTS <-
c("PP_wide", # Producer prices
"PD", # GDP deflator
"OA") # Population
Expand All @@ -44,7 +45,6 @@ module_xfaostat_L101_RawDataPreProc2_PP_PD_OA <- function(command, ...) {
get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)



if(Process_Raw_FAO_Data == FALSE) {

# Prebuilt data is read here ----
Expand Down Expand Up @@ -175,7 +175,7 @@ module_xfaostat_L101_RawDataPreProc2_PP_PD_OA <- function(command, ...) {
add_comments("Preprocessed FAOSTAT regional gdp deflators") %>%
add_precursors("QCL_area_code_map",
"aglu/FAO/FAOSTAT/Deflators_E_All_Data_(Normalized)_PalceHolder",
"aglu/fao/FAOSTAT/Other_supplementary/GDP_deflator_Taiwan") ->
"aglu/FAO/FAOSTAT/Other_supplementary/GDP_deflator_Taiwan") ->
PD

verify_identical_prebuilt(PD)
Expand Down
22 changes: 12 additions & 10 deletions R/xfaostat_L101_RawDataPreProc6_TM.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,23 @@ module_xfaostat_L101_RawDataPreProc6_TM <- function(command, ...) {

} else {

# Get area code ----
QCL_area_code <- QCL_area_code_map %>% distinct(area_code) %>% pull()

FAOSTAT_RDS <- c("TM_bilateral_wide")

FAOSTAT_RDS <- c("TM_bilateral_wide")
DIR_PREBUILT_FAOSTAT <- "data"

DIR_PREBUILT_FAOSTAT <- "data/PREBUILT_FAOSTAT"
lapply(FAOSTAT_RDS, function(d){
assertthat::assert_that(file.exists(file.path(DIR_PREBUILT_FAOSTAT, paste0(d, ".rds"))))
assign(d, readRDS(file.path(DIR_PREBUILT_FAOSTAT, paste0(d, ".rds"))),
envir = parent.env(environment()))
})

lapply(FAOSTAT_RDS, function(d){
assertthat::assert_that(file.exists(file.path(DIR_PREBUILT_FAOSTAT, paste0(d, ".rds"))))
assign(d, readRDS(file.path(DIR_PREBUILT_FAOSTAT, paste0(d, ".rds"))),
envir = parent.env(environment()))
})


# # Get area code ----
# QCL_area_code <- QCL_area_code_map %>% distinct(area_code) %>% pull()
#
#
# # *[TM] Bilateral trade ----
# #*FAO has better quality bilateral data since 1992, covering most SUA items
# FAOSTAT_load_raw_data("TM") # Bilateral trade
Expand Down Expand Up @@ -123,7 +125,7 @@ module_xfaostat_L101_RawDataPreProc6_TM <- function(command, ...) {
### output TM ----

TM_bilateral_wide %>%
add_title("FAO bilateral trade (TM) ") %>%
add_title("FAO bilateral trade (TM)", overwrite = T) %>%
add_units("tonne") %>%
add_comments("Preprocessed FAO TM_wide") %>%
add_precursors("aglu/FAO/FAOSTAT/Trade_DetailedTradeMatrix_E_All_Data_(Normalized)_PalceHolder",
Expand Down
12 changes: 5 additions & 7 deletions R/xfaostat_L102_ProductionArea.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,12 @@ module_xfaostat_L102_ProductionArea <- function(command, ...) {

# wide to long
QCL_wide %>% gather_years() %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
FAOSTAT_AREA_RM_NONEXIST() -> QCL

FBS_wide %>% gather_years() %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
FAOSTAT_AREA_RM_NONEXIST() -> FBS

FBSH_CB_wide %>% gather_years() %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
FAOSTAT_AREA_RM_NONEXIST() -> FBSH_CB


Expand Down Expand Up @@ -372,13 +369,14 @@ module_xfaostat_L102_ProductionArea <- function(command, ...) {
FBS_FISH <-
FBS %>% filter(year >= 2010, element_code == 5511, # production
item_code %in% c(FBS_COMM_FISH %>% pull(item_code))) %>%
replace_na(list(value = 0)) %>%
bind_rows(
FBSH_CB %>% filter(year < 2010, element_code == 5511, # production
item_code %in% c(FBS_COMM_FISH %>% pull(item_code)))
) %>% mutate(value = value *1000,
unit = "tonnes",
element_code = 5510 # changed here for consistency
) %>%
replace_na(list(value = 0)) %>%
mutate(value = value *1000,
unit = "tonnes",
element_code = 5510 # changed here for consistency
) %>%
select(area_code, area, item_code, item, element, element_code, year, value, unit) %>%
FAOSTAT_AREA_RM_NONEXIST
Expand Down
45 changes: 39 additions & 6 deletions R/xfaostat_L199_CSVExportAgSUA.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
module_xfaostat_L199_ExportCSV <- function(command, ...) {

MODULE_INPUTS <-
c("Bal_new_all",
c("PD",
"Bal_new_all",
"FBSH_CB_wide",
"QCL_PROD",
"QCL_AN_LIVEANIMAL",
Expand All @@ -27,11 +28,10 @@ module_xfaostat_L199_ExportCSV <- function(command, ...) {
"QCL_FODDERCROP",
"QCL_PRIMARY_PROD_PV",
"TM_bilateral_wide",
"PD",
"SUA_food_macronutrient_rate")

MODULE_OUTPUTS <-
c("xfaostat_L199_DUMMY")
c("xfaostat_L199_GCAMDATA_FAOSTAT_CSV")

if(command == driver.DECLARE_INPUTS) {
return(MODULE_INPUTS)
Expand All @@ -45,7 +45,36 @@ module_xfaostat_L199_ExportCSV <- function(command, ...) {


# adding dummy output ----
xfaostat_L199_DUMMY <- data.frame()
xfaostat_L199_GCAMDATA_FAOSTAT_CSV <-
tibble(CSV_export = c("GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019",
"GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020",
"GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009",
"GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020",
"GCAMDATA_FAOSTAT_ProdArea_96Regs_16FodderItems_1973to2020",
"GCAMDATA_FAOSTAT_AnimalStock_202Regs_22Items_1973to2020",
"GCAMDATA_FAOSTAT_ProducerPrice_170Regs_185PrimaryItems_2010to2020",
"FAO_GDP_Deflators",
"GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean"
))

xfaostat_L199_GCAMDATA_FAOSTAT_CSV %>%
add_title("Export CSV to DIR_OUTPUT_CSV") %>%
add_units("NA") %>%
add_comments("Export CSV") %>%
add_precursors("PD",
"Bal_new_all",
"FBSH_CB_wide",
"QCL_PROD",
"QCL_AN_LIVEANIMAL",
"QCL_AN_PRIMARY_MILK",
"QCL_CROP_PRIMARY",
"QCL_FODDERCROP",
"QCL_PRIMARY_PROD_PV",
"TM_bilateral_wide",
"SUA_food_macronutrient_rate") ->
xfaostat_L199_GCAMDATA_FAOSTAT_CSV



if (OUTPUT_Export_CSV == T) {
# Load required inputs ----
Expand All @@ -60,7 +89,12 @@ module_xfaostat_L199_ExportCSV <- function(command, ...) {

TM_bilateral_wide %>% gather_years() %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
filter(value > 0) -> TM_bilateral
filter(value > 0) %>%
FAOSTAT_AREA_RM_NONEXIST() %>%
rename(area_code1 = area_code, area_code = source_code) %>%
FAOSTAT_AREA_RM_NONEXIST() %>%
rename(source_code = area_code, area_code = area_code1) ->
TM_bilateral

TM_bilateral %>%
# only export quality data years
Expand Down Expand Up @@ -89,7 +123,6 @@ module_xfaostat_L199_ExportCSV <- function(command, ...) {
)



# SUA and FBS ----
## *GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019 ----

Expand Down
71 changes: 49 additions & 22 deletions R/xfaostat_helper_funcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,41 +32,69 @@ FAOSTAT_metadata <- function (code = NULL){

#' FAOSTAT_load_raw_data: load raw csv data
#' @description Read csv data and "." in column name is substituted with "_".
#'
#' @param DATASETCODE Dataset code in FAO metadata or the name of a csv file.
#' @param GET_MAPPINGCODE if NULL return data if char return other mapping files
#' @param DATA_FOLDER Path to the folder storing the data.
#'
#' @importFrom readr read_csv
#' @importFrom magrittr %>%
#' @importFrom assertthat assert_that
#' @export

FAOSTAT_load_raw_data <- function(DATASETCODE,
DATA_FOLDER = DIR_RAW_DATA_FAOSTAT){
DATA_FOLDER = DIR_RAW_DATA_FAOSTAT,
GET_MAPPINGCODE = NULL){
assertthat::assert_that(is.character(DATASETCODE))
assertthat::assert_that(is.character(DATA_FOLDER))

my_env <- new.env()

metadata <- FAOSTAT_metadata()

# Loop through each code
for (CODE in DATASETCODE) {
if (is.null(GET_MAPPINGCODE)) {
# Loop through each code
for (CODE in DATASETCODE) {

metadata %>% filter(datasetcode == CODE) -> metadata1

zip_file_name <- file.path(DATA_FOLDER, basename(metadata1$filelocation))
assertthat::assert_that(file.exists(zip_file_name))
# assuming the csv in zip has the same base name
csv_file_name <- gsub(".zip$", ".csv", basename(zip_file_name))

df <- readr::read_csv(unz(zip_file_name, csv_file_name), col_types = NULL)
# Lower case col names and use _ as delimiter
names(df) <- tolower(gsub("\\.| ", "_", names(df)))
# Assigned to parent env
#assign(CODE, df, envir = parent.frame())
assign(CODE, df, envir = parent.env(environment()))
# Assigned to current env
#assign(CODE, df, envir = .GlobalEnv)
}
} else if(is.character(GET_MAPPINGCODE) == T){

metadata %>% filter(datasetcode == CODE) -> metadata1
for (CODE in DATASETCODE) {

zip_file_name <- file.path(DATA_FOLDER, basename(metadata1$filelocation))
assertthat::assert_that(file.exists(zip_file_name))
# assuming the csv in zip has the same base name
csv_file_name <- gsub(".zip$", ".csv", basename(zip_file_name))
metadata %>% filter(datasetcode == CODE) -> metadata1

df <- readr::read_csv(unz(zip_file_name, csv_file_name), col_types = NULL)
# Lower case col names and use _ as delimiter
names(df) <- tolower(gsub("\\.| ", "_", names(df)))
# Assigned to parent env
#assign(CODE, df, envir = parent.frame())
assign(CODE, df, envir = parent.env(environment()))
# Assigned to current env
#assign(CODE, df, envir = .GlobalEnv)
}
zip_file_name <- file.path(DATA_FOLDER, basename(metadata1$filelocation))
assertthat::assert_that(file.exists(zip_file_name))
# assuming the csv in zip has the same base name
csv_file_name <- gsub(".zip$", ".csv", basename(zip_file_name))

csv_file_name <- gsub("All_Data_\\(Normalized\\)", GET_MAPPINGCODE, csv_file_name)

df <- readr::read_csv(unz(zip_file_name, csv_file_name), col_types = NULL)
# Lower case col names and use _ as delimiter
names(df) <- tolower(gsub("\\.| ", "_", names(df)))
# Assigned to parent env
assign(paste0(CODE, "_", GET_MAPPINGCODE), df, envir = parent.env(environment()))
# Assigned to current env
#assign(paste0(CODE, "_", GET_MAPPINGCODE), df, envir = .GlobalEnv)
}

} else {stop("Wrong GET_MAPPINGCODE")}

}

Expand Down Expand Up @@ -518,6 +546,7 @@ output_csv_data <- function(gcam_dataset, col_type_nonyear,
#'
#' @return The same dataframe with balanced world export and import.


GROSS_TRADE_ADJUST <- function(.DF,
.MIN_TRADE_PROD_RATIO = 0.01,
.Reg_VAR = 'area_code',
Expand All @@ -533,10 +562,8 @@ GROSS_TRADE_ADJUST <- function(.DF,
# Join ExportScaler and ImportScaler
left_join(
.DF %>%
#group_by_at(vars(all_of(.GROUP_VAR), element)) %>%
#summarise(value = sum(value, na.rm = T), .groups = "drop") %>%
spread(element, value) %>%
group_by_at(vars(all_of(.GROUP_VAR))) %>%
dplyr::group_by_at(vars(dplyr::all_of(.GROUP_VAR))) %>%
# filter out items with zero world trade or production
# and replace na to zero later for scaler
replace_na(list(Export = 0, Import = 0, Production = 0)) %>%
Expand All @@ -550,9 +577,9 @@ GROSS_TRADE_ADJUST <- function(.DF,
# the trade scalers will be applied to all regions
mutate(ExportScaler = (sum(Export) + sum(Import))/ 2 / sum(Export),
ImportScaler = (sum(Export) + sum(Import))/ 2 / sum(Import)) %>%
select(all_of(c(.Reg_VAR, .GROUP_VAR)), ExportScaler, ImportScaler) %>%
select(dplyr::all_of(c(.Reg_VAR, .GROUP_VAR)), ExportScaler, ImportScaler) %>%
ungroup(),
by = c(all_of(c(.Reg_VAR, .GROUP_VAR)))) %>%
by = c(dplyr::all_of(c(.Reg_VAR, .GROUP_VAR)))) %>%
replace_na(list(ExportScaler = 0, ImportScaler = 0)) %>%
# If world export, import, or prod is 0, trade will be zero
mutate(value = case_when(
Expand Down
Loading

0 comments on commit 5e66145

Please sign in to comment.