Skip to content

Commit

Permalink
add check for fields that should have a one to one relationship, stil…
Browse files Browse the repository at this point in the history
…l not implemented in the tool
  • Loading branch information
rubenpp7 committed Aug 4, 2023
1 parent 233ef7f commit 7d7687c
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export(leftfrom)
export(loopcheckIPTdataset)
export(loopfitnessandqc)
export(midstring)
export(one_to_one_check)
export(reversetaxmatch)
export(richtfrom)
export(txt_to_cols)
Expand Down
67 changes: 67 additions & 0 deletions R/one_to_one_check.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#' Checks a one to one correspondence between two fields of a dataframe and highlights records where it is not found.
#'
#' This function highlights records where a one to one relationship is expected between two fields but is not found.
#' Usually relevant to compare fields that should have one-to-one relationship such as scientificName and scientifciNameID
#' @param df mandatory. a dataframe that contains at least two fields that must have a one to one relationship
#' @param field_x mandatory. a character string containing the name of a field that wants to be compared against another field
#' @param field_y mandatory. a character string containing the name of a field that wants to be compared against another field
#' @import dplyr
#' @export
#' @examples
#' \dontrun{
#' output <- one_to_one_check(df = Occurrence, field_x = "scientificNameID", field_y = "scientificName")
#' output <- one_to_one_check(df = eMoF, field_x = "measurementTypeID", field_y = "measurementType")
#' output <- one_to_one_check(df = eMoF, field_x = "measurementValueID", field_y = "measurementValue")
#' output <- one_to_one_check(df = eMoF, field_x = "measurementUnitID", field_y = "measurementUnit")
#' output <- one_to_one_check(df = Event, field_x = "institutionID", field_y = "institutionCode")
#' }



one_to_one_check <- function(df, field_x, field_y) {


if("data.frame" %in% class(df) & # Checks that df is a dataframe
"character" %in% class(field_x) & # Checks that field_x is a character string
"character" %in% class(field_y) & # Checks that field_y is a character string
field_x %in% names(df) & # Checks that field_x is one of the df fields
field_y %in% names(df) ) { # Checks that field_y is one of the df fields



# Isolates the wanted names and ids from the data frame
unique_names <- df %>% select (all_of(c(field_x, field_y))) %>%
distinct()

# Creates data frame with records where a name is linked to several different ids
multiple_x_per_y <- unique_names %>% group_by(across(all_of(field_y))) %>%
summarise(n = n()) %>%
filter (n > 1) %>%
left_join (unique_names,
by = field_y) %>%
distinct() %>%
mutate (issue = paste0("multiple ", field_x, " per ", field_y))

# Creates data frame with records where an id is linked to several different names
multiple_y_per_x <- unique_names %>% group_by(across(all_of(field_x))) %>%
summarise(n = n()) %>%
filter (n > 1) %>%
left_join (unique_names,
by = field_x) %>%
distinct() %>%
mutate (issue = paste0("multiple ", field_y, " per ", field_x))

# Unites previously created data frames
result <- bind_rows(multiple_x_per_y, multiple_y_per_x)

} else {

warning("--------> Either the field_x and field_y arguments are not character strings, OR they are not fields in df OR df is not a data frame <--------")

}

return(result)

}


28 changes: 28 additions & 0 deletions man/one_to_one_check.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 7d7687c

Please sign in to comment.