diff --git a/NAMESPACE b/NAMESPACE index 5e90f46a..65e8fb96 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,11 @@ # Generated by roxygen2: do not edit by hand -export(FAOSTAT_check_count_plot) export(FAOSTAT_load_raw_data) export(FAOSTAT_metadata) export(FAO_AREA_RM_NONEXIST) export(FF_FILL_NUMERATOR_DENOMINATOR) +export(FF_check_count_plot) +export(FF_join_checkmap) export(FF_summary) export(Moving_average) export(approx_fun) @@ -39,6 +40,8 @@ importFrom(XML,xmlParse) importFrom(XML,xmlToDataFrame) importFrom(assertthat,assert_that) importFrom(data.table,data.table) +importFrom(dplyr,all_of) +importFrom(dplyr,any_of) importFrom(dplyr,any_vars) importFrom(dplyr,bind_rows) importFrom(dplyr,case_when) @@ -59,6 +62,7 @@ importFrom(dplyr,mutate_at) importFrom(dplyr,n) importFrom(dplyr,pull) importFrom(dplyr,rename) +importFrom(dplyr,rename_at) importFrom(dplyr,right_join) importFrom(dplyr,select) importFrom(dplyr,summarise) @@ -78,6 +82,7 @@ importFrom(graphics,title) importFrom(magrittr,"%$%") importFrom(magrittr,"%>%") importFrom(methods,is) +importFrom(purrr,reduce) importFrom(readr,read_csv) importFrom(stats,aggregate) importFrom(tibble,as_tibble) diff --git a/R/xfaostat_L100_constants.R b/R/xfaostat_L100_constants.R index 33bb73c8..21a43d3f 100644 --- a/R/xfaostat_L100_constants.R +++ b/R/xfaostat_L100_constants.R @@ -1,6 +1,15 @@ -# Copyright 2019 Battelle Memorial Institute; see the LICENSE file. -# General behavior constants ====================================================================== + + # Directories ---- + + #DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata") + + DIR_RAW_DATA_FAOSTAT <- "inst/extdata/aglu/FAO/FAOSTAT" + OUTPUT_Export_CSV = T + # Output GCAM csv + DIR_OUTPUT_CSV <- "inst/extdata/aglu/FAO/temp" + dir.create(file.path(DIR_OUTPUT_CSV), showWarnings = FALSE) + # Historical years of focus ---- @@ -12,19 +21,12 @@ Hist_Year_FBS <- seq(2010, 2019) # New FBS years MIN_HIST_PP_YEAR = 2010 # first producer price year -# having issues with package check here -# comment this line out when building package - #DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata") - DIR_RAW_DATA_FAOSTAT <- "inst/extdata/aglu/FAO/FAOSTAT" - OUTPUT_Export_CSV = T - # Output GCAM csv - DIR_OUTPUT_CSV <- "inst/extdata/aglu/FAO/temp" - dir.create(file.path(DIR_OUTPUT_CSV), showWarnings = FALSE) - - # Balance elements; used in Get_SUA_TEMPLATE and SUA_bal_adjust + # Balance elements ---- + #******************************************* + # used in Get_SUA_TEMPLATE and SUA_bal_adjust c("Opening stocks", "Production", "Import", "Export", "Processed", "Food", "Feed", "Seed", "Other uses", "Loss", "Closing stocks", @@ -44,3 +46,11 @@ REGIONAL_NUTRIENT_MASS_CONV_OUTLIER_BOUNDARY <- 0.15 Hist_MEAN_Year_NUTRIENT_MASS_CONV <- 2010:2019 # average cal per g + + + # Other utils ---- + #******************************************* + # decimal places in ggplot + scaleFUN <- function(x) sprintf("%.0f", x) + + #******************************************* diff --git a/R/xfaostat_L105_DataConnectionToSUA.R b/R/xfaostat_L105_DataConnectionToSUA.R index 902d3173..88e4c753 100644 --- a/R/xfaostat_L105_DataConnectionToSUA.R +++ b/R/xfaostat_L105_DataConnectionToSUA.R @@ -8,7 +8,7 @@ #' @param ... other optional parameters, depending on command #' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if #' \code{command} is "MAKE") all the generated outputs -#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities. +#' @details This chunk compiles balanced supply utilization data bu connecting FAO datasets. #' @importFrom assertthat assert_that #' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at #' first case_when vars transmute @@ -347,60 +347,9 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) { ## 2.3. FN: Balance gross trade ---- - # Fn adjusting gross trade in all regions to be consistent with average (world export and import) - - #' Balance gross trade - #' @description Scale gross export and import in all regions to make them equal at the world level. - #' @param .DF An input dataframe with an element col including Import and Export - #' @param .MIN_TRADE_PROD_RATIO Trade will be removed if world total export or import over production is smaller than .MIN_TRADE_PROD_RATIO (1% default value) - #' @param .Reg_VAR Region variable name; default is ("area_code") - #' @param .GROUP_VAR Group variable; default is ("item_code", "year") - #' @return The same dataframe with balanced world export and import. - - GROSS_TRADE_ADJUST <- function(.DF, - .MIN_TRADE_PROD_RATIO = 0.01, - .Reg_VAR = 'area_code', - .GROUP_VAR = c("item_code", "year")){ - - # assert .DF structure - assertthat::assert_that(all(c("element", .GROUP_VAR) %in% names(.DF))) - assertthat::assert_that(dplyr::is.grouped_df(.DF) == F) - assertthat::assert_that(all(c("Import", "Export", "Production") %in% - c(.DF %>% distinct(element) %>% pull))) - - .DF %>% - # Join ExportScaler and ImportScaler - left_join( - .DF %>% - #group_by_at(vars(all_of(.GROUP_VAR), element)) %>% - #summarise(value = sum(value, na.rm = T), .groups = "drop") %>% - spread(element, value) %>% - group_by_at(vars(all_of(.GROUP_VAR))) %>% - # filter out items with zero world trade or production - # and replace na to zero later for scaler - replace_na(list(Export = 0, Import = 0, Production = 0)) %>% - filter(sum(Export) != 0, sum(Import) != 0, sum(Production) != 0) %>% - # world trade should be later than .MIN_TRADE_PROD_RATIO to have meaningful data - # depending on item group, .MIN_TRADE_PROD_RATIO can be set differently - filter(sum(Export) / sum(Production) > .MIN_TRADE_PROD_RATIO) %>% - filter(sum(Import) / sum(Production) > .MIN_TRADE_PROD_RATIO) %>% - # finally, - # use average gross trade value to calculate trade scaler - # the trade scalers will be applied to all regions - mutate(ExportScaler = (sum(Export) + sum(Import))/ 2 / sum(Export), - ImportScaler = (sum(Export) + sum(Import))/ 2 / sum(Import)) %>% - select(all_of(c(.Reg_VAR, .GROUP_VAR)), ExportScaler, ImportScaler) %>% - ungroup(), - by = c(all_of(c(.Reg_VAR, .GROUP_VAR)))) %>% - replace_na(list(ExportScaler = 0, ImportScaler = 0)) %>% - # If world export, import, or prod is 0, trade will be zero - mutate(value = case_when( - element %in% c("Export") ~ value * ExportScaler, - element %in% c("Import") ~ value * ImportScaler, - TRUE ~ value)) %>% - select(-ExportScaler, -ImportScaler) - } + #GROSS_TRADE_ADJUST function moved to helper functions + # 3. Process items in FAO_items to get Balanced SUA data ---- diff --git a/R/xfaostat_L201_Forestry.R b/R/xfaostat_L201_Forestry.R index 17c02967..60ff2f80 100644 --- a/R/xfaostat_L201_Forestry.R +++ b/R/xfaostat_L201_Forestry.R @@ -60,7 +60,6 @@ module_xfaostat_L201_Forestry <- function(command, ...) { unit) %>% filter(!is.na(value)) %>% rm_accent("item", "area") -> FO1 - rm(FO) # 215 unique areas with production data FO_area <- diff --git a/R/xfaostat_L401_Fertilizer.R b/R/xfaostat_L401_Fertilizer.R index d75a8c77..ac7c3f02 100644 --- a/R/xfaostat_L401_Fertilizer.R +++ b/R/xfaostat_L401_Fertilizer.R @@ -15,7 +15,7 @@ #' @importFrom tibble tibble #' @importFrom tidyr complete drop_na gather nesting spread replace_na fill #' @author XZ 2023 -module_xfaostatL401_Fertilizer <- function(command, ...) { +module_xfaostat_L401_Fertilizer <- function(command, ...) { MODULE_INPUTS <- c(FILE = "aglu/FAO/FAO_an_items_PRODSTAT", @@ -59,7 +59,6 @@ module_xfaostatL401_Fertilizer <- function(command, ...) { complete(nesting(area_code, area), nesting(item_code, item), nesting(element_code, element, unit), year) %>% rm_accent("item", "area") -> RFN1 - rm(RFN) # clean ## RFN1_Production ---- RFN1 %>% diff --git a/R/xfaostat_helper_funcs.R b/R/xfaostat_helper_funcs.R index 7346ac63..329a9b28 100644 --- a/R/xfaostat_helper_funcs.R +++ b/R/xfaostat_helper_funcs.R @@ -27,22 +27,6 @@ FAOSTAT_metadata <- function (code = NULL){ return(metadata) } -FAOSTAT_download_bulk <- function(DATASETCODE, - DATA_FOLDER = DIR_RAW_DATA_FAOSTAT){ - - assertthat::assert_that(is.character(DATASETCODE)) - assertthat::assert_that(is.character(DATA_FOLDER)) - - - lapply(DATASETCODE, function(d){ - metadata <- FAOSTAT_metadata(code = d) - url_bulk = metadata$filelocation - - file_name <- basename(url_bulk) - download.file(url_bulk, file.path(DATA_FOLDER, file_name)) - }) - -} @@ -87,34 +71,6 @@ FAOSTAT_load_raw_data <- function(DATASETCODE, } -#remove accent and apostrophe for cols in a df -#' rm_accent: Remove accent and prime in selected columns of a data frame -#' -#' @param .df Input data frame -#' @param ... A character set of column names -#' @importFrom magrittr %>% -#' @importFrom assertthat assert_that -#' @importFrom dplyr intersect mutate_at -#' -#' @return A data frame with accent and prime removed -#' @export - -rm_accent <- function(.df, ...){ - - assertthat::assert_that( - length(intersect(c(...), names(.df))) == length(c(...)), - msg = "Columns listed not included in the data frame") - - # .df %>% - # mutate_at(c(...), iconv, to = 'ASCII//TRANSLIT') %>% - # mutate_at(c(...), .funs = gsub, pattern = "\\'", replacement = "") - - .df %>% - mutate(dplyr::across(c(...), iconv, to = 'ASCII//TRANSLIT')) %>% - mutate(dplyr::across(c(...), gsub, pattern = "\\'", replacement = "")) - -} - @@ -313,7 +269,16 @@ FF_FILL_NUMERATOR_DENOMINATOR <- function(.DF, NUMERATOR_c, DENOMINATOR_c, } -# A function to full join data frame to check mappings with common code +#' FF_join_checkmap: full-join data frames by a common COL_by variable to checking mapping +#' +#' @param DFs Data frames to be full joined. +#' @param COL_by By variable in join. +#' @param COL_rename Other common variables to rename (by adding df names as prefix) before the join. +#' @importFrom magrittr %>% +#' @importFrom dplyr rename_at select any_of all_of full_join +#' @importFrom purrr reduce +#' @return A joined data frame +#' @export FF_join_checkmap <- function(DFs, COL_by, COL_rename){ lapply(DFs, function(df){ @@ -323,7 +288,17 @@ FF_join_checkmap <- function(DFs, COL_by, COL_rename){ }) %>% purrr:: reduce(full_join, by = COL_by) } -# Count item_code and area_code by year +#' FF_check_count_plot: count item_code and area_code by year +#' +#' @param .DF Input data frame +#' @param .ELEMENT A set of elements (in Char) to focus. If empty, all elements are summarized +#' @importFrom dplyr summarize +#' @importFrom magrittr %>% +#' @importFrom tidyr gather +#' @importFrom ggplot2 ggplot aes facet_wrap geom_line theme_bw +#' +#' @return A plot summarizing the time-series of changing the count of item_code and area_code (grouped by element). +#' @export FF_check_count_plot <- function(.DF, .ELEMENT = c()){ if (.ELEMENT %>% length() == 0 ) { .DF %>% distinct(element) %>% pull -> .ELEMENT @@ -338,6 +313,34 @@ FF_check_count_plot <- function(.DF, .ELEMENT = c()){ theme_bw() } +#remove accent and apostrophe for cols in a df +#' rm_accent: Remove accent and prime in selected columns of a data frame +#' +#' @param .df Input data frame +#' @param ... A character set of column names +#' @importFrom magrittr %>% +#' @importFrom assertthat assert_that +#' @importFrom dplyr intersect mutate_at +#' +#' @return A data frame with accent and prime removed +#' @export + +rm_accent <- function(.df, ...){ + + assertthat::assert_that( + length(intersect(c(...), names(.df))) == length(c(...)), + msg = "Columns listed not included in the data frame") + + # .df %>% + # mutate_at(c(...), iconv, to = 'ASCII//TRANSLIT') %>% + # mutate_at(c(...), .funs = gsub, pattern = "\\'", replacement = "") + + .df %>% + mutate(dplyr::across(c(...), iconv, to = 'ASCII//TRANSLIT')) %>% + mutate(dplyr::across(c(...), gsub, pattern = "\\'", replacement = "")) + +} + assert_FBS_balance <- function(.DF){ @@ -441,33 +444,6 @@ SUA_bal_adjust <- function(.df){ -#' FAOSTAT_check_count_plot: count item_code and area_code by year -#' -#' @param .DF Input data frame -#' @param .ELEMENT A set of elements (in Char) to focus. If empty, all elements are summarized -#' @importFrom dplyr summarize -#' @importFrom magrittr %>% -#' @importFrom tidyr gather -#' @importFrom ggplot2 ggplot aes facet_wrap geom_line theme_bw -#' -#' @return A plot summarizing the time-series of changing the count of item_code and area_code (grouped by element). -#' @export - -FAOSTAT_check_count_plot <- function(.DF, .ELEMENT = c()){ - if (.ELEMENT %>% length() == 0 ) { - .DF %>% distinct(element) %>% pull -> .ELEMENT - } - .DF %>% group_by(year, element) %>% - summarise(Country = length(unique(area_code)), - Item = length(unique(item_code)), .groups = "drop") %>% - gather(header, count, -year, -element) %>% - filter(element %in% .ELEMENT) %>% - ggplot() + facet_wrap(~header, scales = "free") + - geom_line(aes(x = year, y = count, color = element)) + - theme_bw() -} - - #' Function saving dataset to csv file with headers #' @@ -520,10 +496,12 @@ output_csv_data <- function(gcam_dataset, col_type_nonyear, #' Balance gross trade #' @description Scale gross export and import in all regions to make them equal at the world level. +#' #' @param .DF An input dataframe with an element col including Import and Export #' @param .MIN_TRADE_PROD_RATIO Trade will be removed if world total export or import over production is smaller than .MIN_TRADE_PROD_RATIO (1% default value) -#' @param .Reg_VAR Region variable name; default is ("area_code") -#' @param .GROUP_VAR Group variable; default is ("item_code", "year") +#' @param .Reg_VAR Region variable name; default is area_code +#' @param .GROUP_VAR Group variable; default is item_code and year +#' #' @return The same dataframe with balanced world export and import. GROSS_TRADE_ADJUST <- function(.DF, @@ -572,6 +550,24 @@ GROSS_TRADE_ADJUST <- function(.DF, } +FAOSTAT_download_bulk <- function(DATASETCODE, + DATA_FOLDER = DIR_RAW_DATA_FAOSTAT){ + + assertthat::assert_that(is.character(DATASETCODE)) + assertthat::assert_that(is.character(DATA_FOLDER)) + + + lapply(DATASETCODE, function(d){ + metadata <- FAOSTAT_metadata(code = d) + url_bulk = metadata$filelocation + + file_name <- basename(url_bulk) + download.file(url_bulk, file.path(DATA_FOLDER, file_name)) + }) + +} + + # decimal places in ggplot scaleFUN <- function(x) sprintf("%.0f", x) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 987dbec8..1f3ee81e 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -2,5 +2,5 @@ pandoc: 3.1.1 pkgdown: 2.0.7 pkgdown_sha: ~ articles: {} -last_built: 2023-08-08T17:50Z +last_built: 2023-08-08T19:05Z diff --git a/docs/reference/FAOSTAT_metadata.html b/docs/reference/FAOSTAT_metadata.html index a839f89e..482e4d09 100644 --- a/docs/reference/FAOSTAT_metadata.html +++ b/docs/reference/FAOSTAT_metadata.html @@ -63,7 +63,20 @@
FAOSTAT_metadata
-#> Error in eval(expr, envir, enclos): object 'FAOSTAT_metadata' not found
+#> function (code = NULL)
+#> {
+#> FAOxml <- XML::xmlParse(xml2::read_xml("http://fenixservices.fao.org/faostat/static/bulkdownloads/datasets_E.xml"))
+#> metadata <- XML::xmlToDataFrame(FAOxml, stringsAsFactors = FALSE)
+#> names(metadata) <- tolower(gsub("\\.", "_", names(metadata)))
+#> metadata["CB" == metadata[, "datasetcode"], "filelocation"] <- "https://fenixservices.fao.org/faostat/static/bulkdownloads/CommodityBalances_(non-food)_E_All_Data_(Normalized).zip"
+#> if (!is.null(code)) {
+#> metadata <- metadata[code == metadata[, "datasetcode"],
+#> ]
+#> }
+#> return(metadata)
+#> }
+#> <bytecode: 0x000000004680b240>
+#> <environment: namespace:gcamdata>
FF_check_count_plot.Rd
FF_check_count_plot: count item_code and area_code by year
+FF_check_count_plot(.DF, .ELEMENT = c())
Input data frame
A set of elements (in Char) to focus. If empty, all elements are summarized
A plot summarizing the time-series of changing the count of item_code and area_code (grouped by element).
+FF_join_checkmap.Rd
FF_join_checkmap: full-join data frames by a common COL_by variable to checking mapping
+FF_join_checkmap(DFs, COL_by, COL_rename)
Data frames to be full joined.
By variable in join.
Other common variables to rename (by adding df names as prefix) before the join.
A joined data frame
+Trade will be removed if world total export or import over production is smaller than .MIN_TRADE_PROD_RATIO (1
-.Reg_VARRegion variable name; default is ("area_code")
-.GROUP_VARGroup variable; default is ("item_code", "year")
.Reg_VARRegion variable name; default is area_code
+.GROUP_VARGroup variable; default is item_code and year
The same dataframe with balanced world export and import.
diff --git a/docs/reference/index.html b/docs/reference/index.html index 393cded7..f964a517 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -174,10 +174,6 @@extract_prebuilt_data()
extract_prebuilt_data
FAOSTAT_check_count_plot: count item_code and area_code by year
fast_left_join()
Fast left join for large tables
FF_check_count_plot: count item_code and area_code by year
FF_FILL_NUMERATOR_DENOMINATOR Fill in missing values considering relationship between two variables
FF_join_checkmap: full-join data frames by a common COL_by variable to checking mapping
module_aglu_L100.FAO_SUA_PrimaryEquivalent()
module_aglu_L100.FAO_SUA_PrimaryEquivalent
module_xfaostat_L401_Fertilizer
module_xfaostat_L301_LandCover()
module_xfaostat_L301_LandCover
module_xfaostat_L401_Fertilizer
Connect datasets to build SUA
other optional parameters, depending on command
An input dataframe with an element col including Import and Export
Trade will be removed if world total export or import over production is smaller than .MIN_TRADE_PROD_RATIO (1
-.Reg_VARRegion variable name; default is ("area_code")
-.GROUP_VARGroup variable; default is ("item_code", "year")
Depends on command
: either a vector of required inputs, a vector of output names, or (if
- command
is "MAKE") all the generated outputsThe same dataframe with balanced world export and import.
Scale gross export and import in all regions to make them equal at the world level.
- - -Connect datasets to build SUAThis chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
- - -XZ 2023 -Balance gross trade
- - +Depends on command
: either a vector of required inputs, a vector of output names, or (if
command
is "MAKE") all the generated outputs
This chunk compiles balanced supply utilization data bu connecting FAO datasets.
+module_xfaostat_L401_Fertilizer.Rd
process FAOSTAT forestry data (FO) into gcamdata inputs
+module_xfaostat_L401_Fertilizer(command, ...)
API command to execute
other optional parameters, depending on command
Depends on command
: either a vector of required inputs, a vector of output names, or (if
command
is "MAKE") all the generated outputs
This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
+IF TRUE