Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleaning discrete gw sw meas data #58

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
c291fef
adding meas sites to streamorder 3 categorization process
msleckman Dec 20, 2022
3153055
adding commenting to p2_nwis_dv_sw_data
msleckman Dec 20, 2022
7117ee4
adding p2_nwis_meas_sw_data
msleckman Dec 20, 2022
934c675
transformed process of adding stream order cols to sw data into a new…
msleckman Dec 21, 2022
673495b
rename of add_stream_order param
msleckman Dec 21, 2022
0e224e1
updated 4_outputs folder name and updated gitignore
msleckman Dec 21, 2022
8df426d
editing/splittingcleaning of sw and gw functions
msleckman Dec 22, 2022
c31f13d
streamlining changes with cleaning of gw meas data
msleckman Dec 22, 2022
6322758
rnamed sites_in_waterbody script to more general process_nwis_data.R …
msleckman Dec 22, 2022
aef42ff
rname 4_export.R to 4_outputs + rm 4_reports and updated commenting
msleckman Dec 22, 2022
1de4825
adding correct sourcing following changes in filenames;
msleckman Dec 22, 2022
30bcfde
commenting
msleckman Dec 23, 2022
8b56d2f
alignment
msleckman Dec 23, 2022
541d70a
merging
msleckman Dec 23, 2022
aa78552
renaming 2_process_data.R
msleckman Jan 3, 2023
6f0a3a0
spacing in proces_nwis_data.R
msleckman Jan 3, 2023
f7511a4
adding 2_process_site_data.R because was deleted when renaming in aa7…
msleckman Jan 3, 2023
9744076
so3 naming additions
msleckman Jan 3, 2023
367f159
rs formatting causing mg conflict
msleckman Jan 3, 2023
e42aa22
.
msleckman Jan 3, 2023
90c82bb
update targets list name without sw or gw
msleckman Jan 3, 2023
8dc976c
take off space
msleckman Jan 3, 2023
4d9df23
deleted lake tribs R file to enable merge
msleckman Jan 3, 2023
60f2e80
re-adding 2_process_lakes_tribs to see if cnflict re-appears
msleckman Jan 3, 2023
0203c13
nvmd, merge conflicts it removing again
msleckman Jan 3, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
.Renviron
.Renvironment
1_fetch/in/*
4_outputs/out/*
!1_fetch/in/SalineLakeBnds/SalineLakeBnds.shp
!1_fetch/in/SalineLakeBnds/
!1_fetch/in/saline_lakes.csv
Expand All @@ -15,5 +16,7 @@
*.csv
*.xlsx
*.shp
*.rds

_targets/
.DS_Store
63 changes: 63 additions & 0 deletions 2_process/src/process_nwis_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#' @param sites_sf sf object of sf sites and measurements
#' @param waterbody_sf sf object of water body that will be buffered
#' @param lake_waterbody whether water body is lake or not

sites_along_waterbody <- function(sites_sf, waterbody_sf, lake_waterbody = FALSE){


sites_sf <- sites_sf %>%
select(site_no)

## running st_union for the tributary shp because it smooths the buffer and polygons are overlap less.
## Not feasible for lakes due to specific selection of columns
if(lake_waterbody == TRUE){
waterbody_buffered <- waterbody_sf %>% sf::st_buffer(dist = units::set_units(250, m))
}else{
waterbody_buffered <- waterbody_sf %>%
group_by(comid, streamorde) %>%
summarize(geometry = sf::st_union(geometry)) %>%
sf::st_buffer(dist = units::set_units(250, m))
}
filtered_sites <- st_join(sites_sf, waterbody_buffered, left = FALSE) %>%
pull(site_no) %>%
unique()

return(filtered_sites)

}

#' @title join_site_spatial_info
#' @description function that joins nwis site info to nwis sw or gw data
#' @param nwis_data nwis sw or gw data pulled from dataRetrieval::readNWISdata() or similar dataRetrieval functions
#' @param sites_sf sf object of sf sites and measurements
#' @param join_site_col col to site col to join sites_sf with nwis_sw_data. defaults is site_no as that is expected to be common between both dfs

join_site_spatial_info <- function(nwis_data, sites_sf, join_site_col = 'site_no'){

nwis_data %>%
left_join(sites_sf, by = join_site_col) %>%
## used geometry from sites_in_watersheds_sf to get spatial info
st_as_sf() %>%
## Grab coords
mutate(lon = st_coordinates(.)[,1], lat = st_coordinates(.)[,2]) %>%
## remove geometry col
st_drop_geometry()
}

#' @title add_stream_order
#' @description function that joins nwis site info to nwis sw data and adds stream order col
#' @param nwis_sw_data nwis sw data pulled from dataRetrieval::readNWISdata() or similar dataRetrieval functions
#' @param sites_along_streamorder3 vector of site_no that are along stream order 3 streams. Output of sites_along_waterbody()
#' @param sites_along_lake vector of sites that are adjacent to saline lakes. Output of sites_along_waterbody

add_stream_order <- function(nwis_sw_data, sites_along_streamorder3, sites_along_lake){

nwis_sw_data %>%
filter(site_tp_cd %in% c('LK','WE') | grepl('ST',site_tp_cd)) %>%
## create stream_order_category col depending on site type & match with sites stream order/lake vector targets
mutate(
stream_order_category = case_when(
grepl('^ST',site_tp_cd) & site_no %in% sites_along_streamorder3 ~ 'along SO 3+',
site_tp_cd == 'LK' | site_no %in% sites_along_lake ~ 'along lake',
TRUE ~ 'not along SO 3+'))
}
28 changes: 0 additions & 28 deletions 2_process/src/sites_along_waterbody.R

This file was deleted.

66 changes: 0 additions & 66 deletions 2_process_lakes_tribs.R

This file was deleted.

126 changes: 126 additions & 0 deletions 2_process_site_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
source('2_process/src/process_nwis_data.R')

p2_site_targets_list <- list(

## simplified vrn of all site in watersheds(regardless of whether there is relevant data at that site) + all site types
tar_target(p2_site_in_watersheds_sf,
p1_site_in_watersheds_sf %>%
select(site_tp_cd, site_no, Name, geometry)
),

## get just gw sites with outputed data for 2000-2020 (timeframe to change)
tar_target(p2_nwis_dv_gw_data,
p1_nwis_dv_gw_data %>%
left_join(p2_site_in_watersheds_sf, by = 'site_no') %>%
st_as_sf() %>%
mutate(lon = st_coordinates(.)[,1], lat = st_coordinates(.)[,2]) %>%
filter(grepl('GW',site_tp_cd)) %>%
st_drop_geometry() %>%
## quickly re-organizing cols
select(!starts_with('X_'),starts_with('X_'))
),

## getting all sites along lake
tar_target(p2_sw_streamorder3_sites,
sites_along_waterbody(p2_site_in_watersheds_sf,
p2_lake_tributaries_so3,
lake_waterbody = FALSE)
),

## this takes a 5+ minutes due to time for buffer of tributaries to generate
tar_target(p2_sw_in_lake_sites,
sites_along_waterbody(p2_site_in_watersheds_sf,
p2_saline_lakes_sf,
lake_waterbody = TRUE)

),

## get just cont dv sw sites with outputed data for 2000-2022 with stream order category column
tar_target(
p2_nwis_dv_sw_data,
join_site_spatial_info(nwis_data = p1_nwis_dv_sw_data,
sites_sf = p2_site_in_watersheds_sf,
join_site_col = 'site_no') %>%
add_stream_order(nwis_sw_data = .,
sites_along_streamorder3 = p2_sw_streamorder3_sites,
sites_along_lake = p2_sw_in_lake_sites) %>%
## re-organizing cols so that measurements cols come after non-measurement cols
select(!starts_with('X_'),
starts_with('X_'))
),

## get just discrete sw sites with outputed data for 2000-2022 with stream order category column
tar_target(
p2_nwis_meas_sw_data,
join_site_spatial_info(nwis_data = p1_nwis_meas_sw_data,
sites_sf = p2_site_in_watersheds_sf,
join_site_col = 'site_no') %>%
add_stream_order(nwis_sw_data = .,
sites_along_streamorder3 = p2_sw_streamorder3_sites,
sites_along_lake = p2_sw_in_lake_sites) %>%
## re-organizing cols so that measurements cols come after non-measurement cols
select(!c('lat','lon'), c('lat','lon'))
),

## get just discrete gw sites with outputed data for 2000-2022 (no stream order category column)
## Note there are several sites for gw that we are keeping. GW, GW-HZ (Hyporheic-zone well), GW-MW (mult. wells), GW-CR (collector/ranney well), GW-TH (Test hole not completed as a well)
tar_target(
p2_nwis_meas_gw_data,
join_site_spatial_info(nwis_data = p1_nwis_meas_gw_data,
sites_sf = p2_site_in_watersheds_sf,
join_site_col = 'site_no') %>%
## both dfs have a site_tp_cd col so when joining, two versions are created. Resetti
mutate(site_tp_cd = site_tp_cd.y) %>%
select(!contains(c('.x','.y'))) %>%
select(!c('lat','lon'), c('lat','lon'))
)
)

# # SW data -----------------------------------------------------------------
#
# p1_nwis_dv_sw_data_sf <- p1_nwis_dv_sw_data %>% left_join(sites_simplified, by = 'site_no')
#
# p1_nwis_dv_sw_data_sf$site_tp_cd %>% unique()
# # "ST" "ST-DCH" "LK" "FA-DV" "ST-CA" NA "SP"
#
# p1_nwis_dv_sw_data_sf %>% filter(site_tp_cd == 'FA-DV') %>% pull(site_no) %>% unique()
# ## 1 site has SW data for site_tp_cd FA-DV
#
# p1_nwis_dv_sw_data_sf %>% filter(site_tp_cd == 'SP') %>% pull(site_no) %>% unique()
# ## 6 sites has SW data for site_tp_cd SP
#
# p1_nwis_dv_sw_data_sf %>% filter(site_tp_cd == 'LK') %>% pull(site_no) %>% unique()
# ## 10 sites has SW data for site_tp_cd SP
#
#
# sw_sites <- p1_nwis_dv_sw_data_sf %>%
# filter(site_tp_cd %in% c('ST','LK')) %>%
# group_by(site_no,site_tp_cd, geometry) %>%
# summarize(n()) %>%
# st_as_sf()
#
# ## buffer tributaries
# tributaries_buffered <- p2_lake_tributaries %>%
# st_buffer(1000)
#
# ## buffer lakes
# lakes_buffered <- p2_saline_lakes_sf %>% st_buffer(1000)
#
# ## join lake and tribs
# ## / UPDATE: This takes too long, going to skip
# # lake_tributaries <- st_union(tributaries_buffered, lakes_buffered)
#
# # filtered_sites_along_tribs<- sf::st_filter(x = sw_sites,y = tributaries_buffered,
# # .predicate = sf::st_is_within_distance,
# # dist = units::set_units(0, m))
# ## /
#
# filtered_sites_along_lk <- st_join(sw_sites, lakes_buffered, left = FALSE)
# filtered_sites_along_tribs <- st_join(sw_sites, tributaries_buffered, left = FALSE)
#
# ## st_join create tables from the lakes_buffered and tributaries buffered table - so jusing just the filter original table
# filtered_sw_sites_sf <- sw_sites %>% filter(site_no %in% c(filtered_sites_along_tribs$site_no,filtered_sites_along_lk$site_no))
#
#
#
# )
Loading