Skip to content

Commit

Permalink
adding prebuilt data and update processing code
Browse files Browse the repository at this point in the history
  • Loading branch information
realxinzhao committed Aug 15, 2023
1 parent f0fe4f1 commit 55b9bf5
Show file tree
Hide file tree
Showing 12 changed files with 470 additions and 367 deletions.
69 changes: 67 additions & 2 deletions R/constants.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,75 @@
# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.

# gcamfaostat constants =================================================
# ***gcamfaostat constants ----

# Key parameters
# If TRUE, process raw FAO data
# If FALSE, use prebuilt data to load FAO data
Process_Raw_FAO_Data <- TRUE
Process_Raw_FAO_Data <- FALSE
# If TRUE, CSV will be generated and saved to DIR_OUTPUT_CSV
OUTPUT_Export_CSV <- TRUE


# Directories ----

## Fao raw data folder
DIR_RAW_DATA_FAOSTAT <- "inst/extdata/aglu/FAO/FAOSTAT"
## Output GCAM csv
DIR_OUTPUT_CSV <- "inst/extdata/aglu/FAO/temp"
dir.create(file.path(DIR_OUTPUT_CSV), showWarnings = FALSE)


# Historical years of focus ----
#*******************************************
FAOSTAT_Hist_Year <- seq(1970, 2020)
#Bilateral trade year starts from 1986 but higher quality after 1992
#FAOSTAT_Hist_Year_Bilateral <- seq(1992, 2020)
FAOSTAT_Hist_Year_TMBilateral <- seq(2010, 2020)
FAOSTAT_Hist_Year_TCL <- seq(1973, 2019)
FAOSTAT_Hist_Year_FBSH <- seq(1973, 2013)
FAOSTAT_Hist_Year_FBS <- seq(2010, 2019) # New FBS years
MIN_HIST_PP_YEAR = 2010 # first producer price year



# Balance elements ----

# used in Get_SUA_TEMPLATE and SUA_bal_adjust

c("Opening stocks", "Production", "Import",
"Export", "Processed", "Food", "Feed", "Seed", "Other uses", "Loss", "Closing stocks",
"Residuals", "Regional supply", "Regional demand", "Stock Variation") ->
Bal_element_new



# Data processing assumptions ----

# Forest trade data adjustment
# Adjust Export when Demand = Production + Import - Export < 0
# Adjust Export Production * Export_Production_ratio
For_Export_Production_Ratio_Adj = 0.9

# Boundary used for correct regional value with world of the conversion from mass to macro-nutrient
# Used in FAOSTAT_S1D_Food_Kcal.R
REGIONAL_NUTRIENT_MASS_CONV_OUTLIER_BOUNDARY <- 0.15
Hist_MEAN_Year_NUTRIENT_MASS_CONV <- 2010:2019 # average cal per g


# Other utils ----

# decimal places in ggplot
scaleFUN <- function(x) sprintf("%.0f", x)





#*******************************************
#*******************************************


# ***Default constants in gcamdata ----


# General behavior constants ======================================================================
Expand Down
131 changes: 68 additions & 63 deletions R/xfaostat_L101_RawDataPreProc6_TM.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ module_xfaostat_L101_RawDataPreProc6_TM <- function(command, ...) {
"QCL_area_code_map")

MODULE_OUTPUTS <-
c("TM_bilateral") # Bilateral trade
c("TM_bilateral_wide") # Bilateral trade


if(command == driver.DECLARE_INPUTS) {
Expand All @@ -43,12 +43,15 @@ module_xfaostat_L101_RawDataPreProc6_TM <- function(command, ...) {
if(Process_Raw_FAO_Data == FALSE) {

# Prebuilt data is read here ----
TM_bilateral <- extract_prebuilt_data("TM_bilateral")
TM_bilateral_wide <- extract_prebuilt_data("TM_bilateral_wide")

} else {

# Get area code ----
QCL_area_code <- QCL_area_code_map %>% distinct(area_code) %>% pull()

FAOSTAT_RDS <- c("TM_bilateral")

FAOSTAT_RDS <- c("TM_bilateral_wide")

DIR_PREBUILT_FAOSTAT <- "data/PREBUILT_FAOSTAT"

Expand All @@ -58,74 +61,76 @@ module_xfaostat_L101_RawDataPreProc6_TM <- function(command, ...) {
envir = parent.env(environment()))
})

TM_bilateral %>% filter(year >= min(FAOSTAT_Hist_Year_TMBilateral)) ->
TM_bilateral

#
# # *[TM] Bilateral trade ----
# #*FAO has better quality bilateral data since 1992, covering most SUA items
# FAOSTAT_load_raw_data("TM") # Bilateral trade
#
# TM %>%
# # Only keep quantities for elements with a unit of tonnes
# filter(element_code %in% c(5910, 5610),
# item_code < 1700,
# # Bilateral trade year starts from 1986 but higher quality after 1992
# # Subset data also to shrink the size
# year >= min(FAOSTAT_Hist_Year_TMBilateral),
# partner_country_code %in% QCL_area_code,
# reporter_country_code %in% QCL_area_code) %>%
# select(reporter_country_code, reporter_countries,
# partner_country_code, partner_countries,
# item_code, item, element_code, element, year, value, unit) ->
# TM1
# rm(TM)
#
#
# ## **Reconcile export and import bilateral flow ----
# # Full join export and import and use available import to fill missing and zero export
# TM1 %>% filter(element %in% c("Export Quantity")) %>% spread(element, value) %>%
# select(exporter = reporter_country_code,
# importer = partner_country_code, item_code, year, expflow = `Export Quantity`) %>%
# full_join(
# TM1 %>% filter(element %in% c("Import Quantity")) %>% spread(element, value)%>%
# select(importer = reporter_country_code,
# exporter = partner_country_code, item_code, year, impflow = `Import Quantity`),
# by = c("exporter", "importer", "item_code", "year")
# ) %>%
# # replace na with zero but use import to replace zero export later
# replace_na(list(expflow = 0, impflow = 0)) %>%
# transmute(area_code = importer, year, item_code, source_code = exporter,
# value = if_else(expflow == 0, impflow, expflow)) %>%
# mutate(element = "Import Quantity") ->
# TM2
#
#
# TM2 %>%
# # remove self-trade (per unaggregated area_code) which existed in FAO TM importing data and likely due to data processing mistakes.
# filter(area_code != source_code) %>%
# left_join(TM1 %>% distinct(item, item_code), by = c("item_code")) %>%
# left_join(TM1 %>% distinct(area = partner_countries, area_code = partner_country_code), by = c("area_code")) %>%
# left_join(TM1 %>% distinct(source = partner_countries, source_code = partner_country_code), by = c("source_code")) %>%
# rm_accent("item", "area", "source") %>%
# mutate(unit = "tonnes") ->
# TM3
# rm(TM1, TM2)
#
# TM3 %>% mutate(value = value / 1000, unit = "1000 tons") -> TM_bilateral
#

# # *[TM] Bilateral trade ----
# #*FAO has better quality bilateral data since 1992, covering most SUA items
# FAOSTAT_load_raw_data("TM") # Bilateral trade
#
# TM %>%
# # Only keep quantities for elements with a unit of tonnes
# filter(element_code %in% c(5910, 5610),
# item_code < 1700,
# # Bilateral trade year starts from 1986 but higher quality after 1992
# # Subset data also to shrink the size
# year >= min(FAOSTAT_Hist_Year_TMBilateral),
# partner_country_code %in% QCL_area_code,
# reporter_country_code %in% QCL_area_code) %>%
# select(reporter_country_code, reporter_countries,
# partner_country_code, partner_countries,
# item_code, item, element_code, element, year, value, unit) ->
# TM1
# rm(TM)
#
#
# ## **Reconcile export and import bilateral flow ----
# # Full join export and import and use available import to fill missing and zero export
# TM1 %>% filter(element %in% c("Export Quantity")) %>% spread(element, value) %>%
# select(exporter = reporter_country_code,
# importer = partner_country_code, item_code, year, expflow = `Export Quantity`) %>%
# full_join(
# TM1 %>% filter(element %in% c("Import Quantity")) %>% spread(element, value)%>%
# select(importer = reporter_country_code,
# exporter = partner_country_code, item_code, year, impflow = `Import Quantity`),
# by = c("exporter", "importer", "item_code", "year")
# ) %>%
# # replace na with zero but use import to replace zero export later
# replace_na(list(expflow = 0, impflow = 0)) %>%
# transmute(area_code = importer, year, item_code, source_code = exporter,
# value = if_else(expflow == 0, impflow, expflow)) %>%
# mutate(element = "Import Quantity") ->
# TM2
#
#
# TM2 %>%
# # remove self-trade (per unaggregated area_code) which existed in FAO TM importing data and likely due to data processing mistakes.
# filter(area_code != source_code) %>%
# left_join(TM1 %>% distinct(item, item_code), by = c("item_code")) %>%
# left_join(TM1 %>% distinct(area = partner_countries, area_code = partner_country_code), by = c("area_code")) %>%
# left_join(TM1 %>% distinct(source = partner_countries, source_code = partner_country_code), by = c("source_code")) %>%
# rm_accent("item", "area", "source") %>%
# mutate(unit = "tonnes") ->
# TM3
# rm(TM1, TM2)
#
#
# TM3 %>% filter(value > 0) %>% spread(year, value) ->
# TM_bilateral_wide
#
# # size has been a key concern here
# utils:::format.object_size(object.size(TM_bilateral_wide), "auto")


### output TM ----

TM_bilateral %>%
add_title("FAO TM") %>%
TM_bilateral_wide %>%
add_title("FAO bilateral trade (TM) ") %>%
add_units("tonne") %>%
add_comments("Preprocessed FAO TM_wide") %>%
add_precursors("aglu/FAO/FAOSTAT/Trade_DetailedTradeMatrix_E_All_Data_(Normalized)_PalceHolder",
"QCL_area_code_map") ->
TM_bilateral
TM_bilateral_wide

verify_identical_prebuilt(TM_bilateral)
verify_identical_prebuilt(TM_bilateral_wide)

}

Expand Down
2 changes: 2 additions & 0 deletions R/xfaostat_L101_RawDataPreProc7_FO.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ module_xfaostat_L101_RawDataPreProc7_FO <- function(command, ...) {
rm_accent("item", "area") ->
FO_Roundwood


### output FO ----
FO_Roundwood %>%
add_title("FAO forestry data") %>%
Expand All @@ -73,6 +74,7 @@ module_xfaostat_L101_RawDataPreProc7_FO <- function(command, ...) {
add_precursors("aglu/FAO/FAOSTAT/Forestry_E_All_Data_(Normalized)_PalceHolder") ->
FO_Roundwood


verify_identical_prebuilt(FO_Roundwood)

}
Expand Down
18 changes: 17 additions & 1 deletion R/xfaostat_L101_RawDataPreProc8_RL_RFN.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,16 @@ module_xfaostat_L101_RawDataPreProc8_RL_RFN <- function(command, ...) {

FAOSTAT_load_raw_data(DATASETCODE = "RL", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT)

RL %>%
filter(year %in% FAOSTAT_Hist_Year,
area_code < 350, # Rm aggregated area
item_code %in% c(6621, 6630, 6640)) -> # Keep Arable land, Temporary crops, Fallow land
RL


### output RL ----
RL %>%
add_title("FAO land data", overwrite = T) %>%
add_title("FAO land data") %>%
add_units("ha") %>%
add_comments("FAO raw land data") %>%
add_precursors("aglu/FAO/FAOSTAT/Inputs_LandUse_E_All_Data_(Normalized)_PalceHolder") ->
Expand All @@ -64,8 +71,17 @@ module_xfaostat_L101_RawDataPreProc8_RL_RFN <- function(command, ...) {
verify_identical_prebuilt(RL)



# RFN ----
FAOSTAT_load_raw_data(DATASETCODE = "RFN", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT)

RFN %>%
filter(year %in% FAOSTAT_Hist_Year,
element_code %in% c(5510, 5157), # Prod and Ag use
area_code < 350, # Rm aggregated area
item_code %in% c(3102)) -> # Nutrient nitrogen N (total)
RFN

### output RFN ----
RFN %>%
add_title("FAO fertilizer data") %>%
Expand Down
15 changes: 9 additions & 6 deletions R/xfaostat_L105_DataConnectionToSUA.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
"QCL_PROD",
"QCL_AN_LIVEANIMAL_MEATEQ",
"TCL_wide",
"TM_bilateral",
"TM_bilateral_wide",
"FBSH_CB_wide",
"FBS_wide",
"SCL_wide")
Expand All @@ -46,7 +46,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)


# Wide to long
# Wide to long ----

SCL_wide %>% gather_years() %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
Expand All @@ -64,8 +64,11 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
FAOSTAT_AREA_RM_NONEXIST() -> TCL

TM_bilateral_wide %>% gather_years() %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
filter(value > 0) -> TM_bilateral

rm(SCL_wide, FBS_wide, FBSH_CB_wide, TCL_wide)
rm(SCL_wide, FBS_wide, FBSH_CB_wide, TCL_wide, TM_bilateral_wide)


# Get area code in QCL that is consistent with FBS e.g., after 2010 only
Expand Down Expand Up @@ -373,7 +376,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {

# 3. Process items in FAO_items to get Balanced SUA data ----
## 3.1 Bal_new_tier1 ----
# Tier1 includes 169 items with best sources e.g. bilateral trade (TM) prodstat (QCL) and supply-utilization-account (SCL)
# Tier1 includes 168 items with best sources e.g. bilateral trade (TM) prodstat (QCL) and supply-utilization-account (SCL)
# SCL has balanced data processed by FAO but the quality was poor with low consistency

Get_SUA_TEMPLATE(.ITEM_CODE = FAO_items %>% filter(tier == 1) %>% pull(item_code)) %>%
Expand Down Expand Up @@ -411,7 +414,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {


## 3.3 Bal_new_tier3 ----
# Tier3 includes 60 items that had QCL but no bilateral trade data
# Tier3 includes 61 items that had QCL but no bilateral trade data
# so use gross trade from SCL

Get_SUA_TEMPLATE(.ITEM_CODE = FAO_items %>% filter(tier == 3) %>% pull(item_code)) %>%
Expand Down Expand Up @@ -733,7 +736,7 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
"QCL_PROD",
"QCL_AN_LIVEANIMAL_MEATEQ",
"TCL_wide",
"TM_bilateral",
"TM_bilateral_wide",
"FBSH_CB_wide",
"FBS_wide",
"SCL_wide")->
Expand Down
6 changes: 5 additions & 1 deletion R/xfaostat_L199_CSVExportAgSUA.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ module_xfaostat_L199_ExportCSV <- function(command, ...) {
"QCL_CROP_PRIMARY",
"QCL_FODDERCROP",
"QCL_PRIMARY_PROD_PV",
"TM_bilateral",
"TM_bilateral_wide",
"PD",
"SUA_food_macronutrient_rate")

Expand Down Expand Up @@ -63,6 +63,10 @@ module_xfaostat_L199_ExportCSV <- function(command, ...) {
# Bilateral trade ----
## *GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020 ----

TM_bilateral_wide %>% gather_years() %>%
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
filter(value > 0) -> TM_bilateral

TM_bilateral %>%
# only export quality data years
filter(year >= min(FAOSTAT_Hist_Year_FBS)) %>%
Expand Down
5 changes: 1 addition & 4 deletions R/xfaostat_L301_LandCover.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,7 @@ module_xfaostat_L301_LandCover <- function(command, ...) {

## Proprocess and quick clean ----
# Only keep Arable land
RL %>% filter(year %in% FAOSTAT_Hist_Year,
area_code < 350,
# Rm aggregated area
item_code %in% c(6621, 6630, 6640)) %>% # Arable land, Temporary crops, Fallow land
RL %>%
select(area_code,
area,
item_code,
Expand Down
Loading

0 comments on commit 55b9bf5

Please sign in to comment.