Skip to content

Commit

Permalink
new parameter merge_households in #31
Browse files Browse the repository at this point in the history
  • Loading branch information
rafapereirabr committed May 5, 2024
1 parent d86ee81 commit 19b0799
Show file tree
Hide file tree
Showing 27 changed files with 196 additions and 47 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
.RData
^_pkgdown\.yml$
^pkgdown$
/data_prep/*
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# censobr v0.3.29999 dev

* Minor changes
* Some functions `read_population`, `read_mortality`, `read_families`, `read_emigration` now include a new parameter `merge_households` (logical) to indicate whether the function should merge household variables to the output data. Closes [#31](https://github.com/ipeaGIT/censobr/issues/31)

# censobr v0.3.2

* Minor changes
Expand Down
6 changes: 3 additions & 3 deletions tests/tests_rafa/merge_household.R → R/merge_household.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ merge_household_var <- function(df,
if (year == 2000) {
key_vars <- c('code_muni', 'code_state', 'abbrev_state','name_state',
'code_region', 'name_region', 'code_weighting', 'V0300')
}
}

if (year == 2010) {
key_vars <- c('code_muni', 'code_state', 'abbrev_state','name_state',
Expand All @@ -64,5 +64,5 @@ merge_household_var <- function(df,
# merge
temp_df <- dplyr::left_join(df, df_household)

return(temp_df)
}
return(temp_df)
}
11 changes: 11 additions & 0 deletions R/read_emigration.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#' @template year
#' @template columns
#' @template add_labels
#' @template merge_households
#' @template as_data_frame
#' @template showProgress
#' @template cache
Expand All @@ -29,6 +30,7 @@
read_emigration <- function(year = 2010,
columns = NULL,
add_labels = NULL,
merge_households = FALSE,
as_data_frame = FALSE,
showProgress = TRUE,
cache = TRUE){
Expand All @@ -37,6 +39,7 @@ read_emigration <- function(year = 2010,
checkmate::assert_numeric(year)
checkmate::assert_vector(columns, null.ok = TRUE)
checkmate::assert_logical(as_data_frame)
checkmate::assert_logical(merge_households)
checkmate::assert_string(add_labels, pattern = 'pt', null.ok = TRUE)

# data available for the years:
Expand All @@ -61,6 +64,14 @@ read_emigration <- function(year = 2010,
### read data
df <- arrow_open_dataset(local_file)

### merge household data
if (isTRUE(merge_households)) {
df <- merge_household_var(df,
year = year,
add_labels = add_labels,
showProgress)
}

### Select
if (!is.null(columns)) { # columns <- c('V0002','V0011')
df <- dplyr::select(df, dplyr::all_of(columns))
Expand Down
11 changes: 11 additions & 0 deletions R/read_families.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#' @param year Numeric. Year of reference in the format `yyyy`. Defaults to `2000`.
#' @template columns
#' @template add_labels
#' @template merge_households
#' @template as_data_frame
#' @template showProgress
#' @template cache
Expand All @@ -23,6 +24,7 @@
read_families <- function(year = 2000,
columns = NULL,
add_labels = NULL,
merge_households = FALSE,
as_data_frame = FALSE,
showProgress = TRUE,
cache = TRUE){
Expand All @@ -31,6 +33,7 @@ read_families <- function(year = 2000,
checkmate::assert_numeric(year)
checkmate::assert_vector(columns, null.ok = TRUE)
checkmate::assert_logical(as_data_frame)
checkmate::assert_logical(merge_households)
checkmate::assert_string(add_labels, pattern = 'pt', null.ok = TRUE)

# data available for the years:
Expand All @@ -55,6 +58,14 @@ read_families <- function(year = 2000,
### read data
df <- arrow_open_dataset(local_file)

### merge household data
if (isTRUE(merge_households)) {
df <- merge_household_var(df,
year = year,
add_labels = add_labels,
showProgress)
}

### Select
if (!is.null(columns)) { # columns <- c('V0002','V0011')
df <- dplyr::select(df, dplyr::all_of(columns))
Expand Down
11 changes: 11 additions & 0 deletions R/read_mortality.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#' @template year
#' @template columns
#' @template add_labels
#' @template merge_households
#' @template as_data_frame
#' @template showProgress
#' @template cache
Expand All @@ -33,6 +34,7 @@
read_mortality <- function(year = 2010,
columns = NULL,
add_labels = NULL,
merge_households = FALSE,
as_data_frame = FALSE,
showProgress = TRUE,
cache = TRUE){
Expand All @@ -41,6 +43,7 @@ read_mortality <- function(year = 2010,
checkmate::assert_numeric(year)
checkmate::assert_vector(columns, null.ok = TRUE)
checkmate::assert_logical(as_data_frame)
checkmate::assert_logical(merge_households)
checkmate::assert_string(add_labels, pattern = 'pt', null.ok = TRUE)

# available for the years:
Expand All @@ -66,6 +69,14 @@ read_mortality <- function(year = 2010,
### read data
df <- arrow_open_dataset(local_file)

### merge household data
if (isTRUE(merge_households)) {
df <- merge_household_var(df,
year = year,
add_labels = add_labels,
showProgress)
}

### Select
if (!is.null(columns)) { # columns <- c('V0002','V0011')
df <- dplyr::select(df, dplyr::all_of(columns))
Expand Down
11 changes: 11 additions & 0 deletions R/read_population.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#' @template year
#' @template columns
#' @template add_labels
#' @template merge_households
#' @template as_data_frame
#' @template showProgress
#' @template cache
Expand All @@ -23,6 +24,7 @@
read_population <- function(year = 2010,
columns = NULL,
add_labels = NULL,
merge_households = FALSE,
as_data_frame = FALSE,
showProgress = TRUE,
cache = TRUE){
Expand All @@ -31,6 +33,7 @@ read_population <- function(year = 2010,
checkmate::assert_numeric(year)
checkmate::assert_vector(columns, null.ok = TRUE)
checkmate::assert_logical(as_data_frame)
checkmate::assert_logical(merge_households)
checkmate::assert_string(add_labels, pattern = 'pt', null.ok = TRUE)

# data available for the years:
Expand All @@ -56,6 +59,14 @@ read_population <- function(year = 2010,
### read data
df <- arrow_open_dataset(local_file)

### merge household data
if (isTRUE(merge_households)) {
df <- merge_household_var(df,
year = year,
add_labels = add_labels,
showProgress)
}

### Select
if (!is.null(columns)) { # columns <- c('V0002','V0011')
df <- dplyr::select(df, dplyr::all_of(columns))
Expand Down
2 changes: 2 additions & 0 deletions R/read_tracts.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#' @export
#' @family Census tract data
#' @examplesIf identical(tolower(Sys.getenv("NOT_CRAN")), "true")
#' library(censobr)
#'
#' # return data as arrow Dataset
#' df <- read_tracts(year = 2010,
#' dataset = 'PessoaRenda',
Expand Down
29 changes: 29 additions & 0 deletions man/merge_household_var.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/read_emigration.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/read_families.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/read_mortality.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/read_population.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/read_tracts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/roxygen/templates/merge_households.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#' @param merge_households Logical. Indicate whether the function should merge
#' household variables to the output data. Defaults to `FALSE`.

10 changes: 0 additions & 10 deletions tests/tests_rafa/merge_household_notes.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,6 @@ df <- censobr::read_population(year = year,
# censobr::data_dictionary(year = year, dataset = 'households')


NA FUNCAO
### merge household vars
if (isTRUE(merge_households)) {
df <- merge_household_var(df,
year = year,
add_labels = add_labels,
showProgress = showProgress)
}



Expand Down Expand Up @@ -100,9 +92,7 @@ merge_household_var <- function(df, year, add_labels=NULL, showProgress=T){
df_household <- dplyr::select(df_household, -all_of(vars_to_drop))

# merge
nrow(df)
temp_df <- dplyr::left_join(df, df_household)
nrow(df)

return(temp_df)
}
1 change: 1 addition & 0 deletions tests/tests_rafa/test_rafa.R
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ urlchecker::url_update()
# Check package errors

# run only the tests
Sys.setenv(NOT_CRAN = "true")
testthat::test_local()

# LOCAL
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test_labels_emigration.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ test_that("add_labels_emigration", {

# sem labels
test1a <- read_emigration(year = 2010, add_labels = NULL, columns = c('abbrev_state', 'V1006')) |>
filter(abbrev_state == 'RO')
dplyr::filter(abbrev_state == 'RO')

# com labels
test1b <- censobr:::add_labels_emigration(arrw = test1a, year=2010, lang = 'pt') |>
filter(abbrev_state == 'RO')
dplyr::filter(abbrev_state == 'RO')

test1a <- as.data.frame(test1a)
test1b <- as.data.frame(test1b)
test1a <- dplyr::collect(test1a)
test1b <- dplyr::collect(test1b)
# add labels
testthat::expect_true('1' %in% test1a$V1006)
testthat::expect_true('Urbana' %in% test1b$V1006)
Expand Down
10 changes: 4 additions & 6 deletions tests/testthat/test_labels_families.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,19 @@ test_that("add_labels_families", {

# sem labels
test1a <- read_families(year = 2000, add_labels = NULL, columns = c('abbrev_state', 'CODV0404_2')) |>
filter(abbrev_state == 'RO')
dplyr::filter(abbrev_state == 'RO')

# com labels
test1b <- censobr:::add_labels_families(arrw = test1a, year=2000, lang = 'pt') |>
filter(abbrev_state == 'RO')
dplyr::filter(abbrev_state == 'RO')

test1a <- as.data.frame(test1a)
test1b <- as.data.frame(test1b)
test1a <- dplyr::collect(test1a)
test1b <- dplyr::collect(test1b)

# add labels
testthat::expect_true('01' %in% test1a$CODV0404_2)
testthat::expect_true('Casal sem filhos' %in% test1b$CODV0404_2)



})


Expand Down
Loading

0 comments on commit 19b0799

Please sign in to comment.