Merge branch 'release/v0.1.0' into main

GTMNERR-Science-Transfer · Sep 2, 2024 · e062568 · e062568
2 parents 5a6396e + d3ffdb6
commit e062568
Show file tree

Hide file tree

Showing 65 changed files with 1,268 additions and 464 deletions.
diff --git a/01_Data_raw/HAB/HAB_FWC.csv → 01_Data_raw/Algae/HAB/HAB_FWC.csv b/01_Data_raw/HAB/HAB_FWC.csv → 01_Data_raw/Algae/HAB/HAB_FWC.csv
diff --git a/01_Data_raw/HAB/HAB_FWC.xlsx → 01_Data_raw/Algae/HAB/HAB_FWC.xlsx b/01_Data_raw/HAB/HAB_FWC.xlsx → 01_Data_raw/Algae/HAB/HAB_FWC.xlsx
diff --git a/01_Data_raw/Biodiversity/PLACEHOLDER.txt b/01_Data_raw/Biodiversity/PLACEHOLDER.txt
@@ -0,0 +1,2 @@
+REMOVE THIS FILE AFTER ADDING DATA TO THIS FOLDER.
+This is jsut so the folders are saved to Git.
diff --git a/01_Data_raw/Hydrology/PLACEHOLDER.txt b/01_Data_raw/Hydrology/PLACEHOLDER.txt
@@ -0,0 +1,2 @@
+REMOVE THIS FILE AFTER ADDING DATA TO THIS FOLDER.
+This is jsut so the folders are saved to Git.
diff --git a/01_Data_raw/Shellfish/PLACEHOLDER.txt b/01_Data_raw/Shellfish/PLACEHOLDER.txt
@@ -0,0 +1,2 @@
+REMOVE THIS FILE AFTER ADDING DATA TO THIS FOLDER.
+This is jsut so the folders are saved to Git.
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2002.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2002.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2002.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2002.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2003.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2003.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2003.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2003.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2004.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2004.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2004.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2004.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2005.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2005.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2005.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2005.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2006.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2006.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2006.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2006.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2007.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2007.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2007.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2007.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2008.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2008.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2008.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2008.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2009.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2009.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2009.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2009.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2010.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2010.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2010.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2010.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2011.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2011.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2011.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2011.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2012.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2012.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2012.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2012.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2013.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2013.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2013.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2013.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2014.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2014.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2014.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2014.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2015.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2015.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2015.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2015.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2016.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2016.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2016.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2016.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2017.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2017.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2017.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2017.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2018.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2018.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2018.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2018.csv
diff --git a/...GTMNERR_SWMP_Nutrient_Data/gtmnut2019.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2019.csv b/...GTMNERR_SWMP_Nutrient_Data/gtmnut2019.csv → ...GTMNERR_SWMP_Nutrient_Data/gtmnut2019.csv
diff --git a/...RR_SWMP_Nutrient_Data/nutrient_readme.txt → ...RR_SWMP_Nutrient_Data/nutrient_readme.txt b/...RR_SWMP_Nutrient_Data/nutrient_readme.txt → ...RR_SWMP_Nutrient_Data/nutrient_readme.txt
diff --git a/...raw/GTMNERR_SWMP_Nutrient_Data/readme.rtf → ...ity/GTMNERR_SWMP_Nutrient_Data/readme.rtf b/...raw/GTMNERR_SWMP_Nutrient_Data/readme.rtf → ...ity/GTMNERR_SWMP_Nutrient_Data/readme.rtf
diff --git a/..._SWMP_Nutrient_Data/sampling_stations.csv → ..._SWMP_Nutrient_Data/sampling_stations.csv b/..._SWMP_Nutrient_Data/sampling_stations.csv → ..._SWMP_Nutrient_Data/sampling_stations.csv
diff --git a/01_Data_raw/Guana_WQ/Guana_masterdata.xlsx → ...er_Quality/Guana_WQ/Guana_masterdata.xlsx b/01_Data_raw/Guana_WQ/Guana_masterdata.xlsx → ...er_Quality/Guana_WQ/Guana_masterdata.xlsx
diff --git a/...ta_raw/Guana_WQ/guana_data_dictionary.csv → ...uality/Guana_WQ/guana_data_dictionary.csv b/...ta_raw/Guana_WQ/guana_data_dictionary.csv → ...uality/Guana_WQ/guana_data_dictionary.csv
diff --git a/...ana_WQ/guana_data_dictionary_updateGK.csv → ...ana_WQ/guana_data_dictionary_updateGK.csv b/...ana_WQ/guana_data_dictionary_updateGK.csv → ...ana_WQ/guana_data_dictionary_updateGK.csv
@@ -1,9 +1,10 @@
-site,alternate_name,site_friendly,station_code,site_acronym,lat,long,start_date,end,wbid,location
+site,alternate_name,site_friendly,StationCode,site_acronym,Latitude,Longitude,start_date,end,wbid,location
 MICKLERS,N,Micklers,GTMMKNUT,MK,30.16073611,-81.36027778,,N,Lake,water_control
 DEPGL1,N,Guana Lake 1,GTMGL1NUT,GL1,30.1504,-81.3604,,N,Lake,open_water
 DEPGL2,N,Guana Lake 2,GTMGL2NUT,GL2,30.1161,-81.3511,,N,Lake,open_water
 LAKE MIDDLE,DEPGL3,Lake Middle,GTMLMNUT,LM,30.08302,-81.34286,,N,Lake,open_water
 LAKE MIDDLE,N,Lake Middle,GTMOLNUT,LM,30.08302,-81.34286,,Y,Lake,open_water
+LAKE MIDDLE,N,Lake Middle,GTMOLNUT_dup,LM,30.08302,-81.34286,,Y,Lake,open_water
 DEPGL4,N,Guana Lake 4,GTMGL4NUT,GL4,30.0451,-81.3351,,N,Lake,open_water
 LAKE SOUTH,N,Lake South,GTMDNNUT,LS,30.023763,-81.327928,,Y,Lake,water_control
 LAKE SOUTH,N,Lake South,GTMLSNUT,LS,30.023763,-81.327928,,N,Lake,water_control

diff --git a/...Guana_WQ/metadata_2017-2023_Q1m_prov.docx → ...Guana_WQ/metadata_2017-2023_Q1m_prov.docx b/...Guana_WQ/metadata_2017-2023_Q1m_prov.docx → ...Guana_WQ/metadata_2017-2023_Q1m_prov.docx
diff --git a/01_Data_raw/Water_Quality/Guana_WQ/~$Guana_masterdata.xlsx b/01_Data_raw/Water_Quality/Guana_WQ/~$Guana_masterdata.xlsx
diff --git a/...Guana_WQ/~$tadata_2017-2023_Q1m_prov.docx → ...Guana_WQ/~$tadata_2017-2023_Q1m_prov.docx b/...Guana_WQ/~$tadata_2017-2023_Q1m_prov.docx → ...Guana_WQ/~$tadata_2017-2023_Q1m_prov.docx
diff --git a/01_Data_raw/WIN/Site_locations.csv → ..._raw/Water_Quality/WIN/Site_locations.csv b/01_Data_raw/WIN/Site_locations.csv → ..._raw/Water_Quality/WIN/Site_locations.csv
diff --git a/01_Data_raw/WIN/WINData_5016.xlsx → ...a_raw/Water_Quality/WIN/WINData_5016.xlsx b/01_Data_raw/WIN/WINData_5016.xlsx → ...a_raw/Water_Quality/WIN/WINData_5016.xlsx
diff --git a/01_Data_raw/WIN/WIN_Warehouse_Glossary.pdf → ...er_Quality/WIN/WIN_Warehouse_Glossary.pdf b/01_Data_raw/WIN/WIN_Warehouse_Glossary.pdf → ...er_Quality/WIN/WIN_Warehouse_Glossary.pdf
diff --git a/01_Data_raw/WIN/WIN_data_merged_20240501.csv → ..._Quality/WIN/WIN_data_merged_20240501.csv b/01_Data_raw/WIN/WIN_data_merged_20240501.csv → ..._Quality/WIN/WIN_data_merged_20240501.csv
diff --git a/...ata_raw/WIN/filtered_WIN_data_GTMNERR.csv → ...Quality/WIN/filtered_WIN_data_GTMNERR.csv b/...ata_raw/WIN/filtered_WIN_data_GTMNERR.csv → ...Quality/WIN/filtered_WIN_data_GTMNERR.csv
diff --git a/...ta_raw/WIN/filtered_WIN_data_counties.csv → ...uality/WIN/filtered_WIN_data_counties.csv b/...ta_raw/WIN/filtered_WIN_data_counties.csv → ...uality/WIN/filtered_WIN_data_counties.csv
diff --git a/01_Data_raw/Weather/PLACEHOLDER.txt b/01_Data_raw/Weather/PLACEHOLDER.txt
@@ -0,0 +1,2 @@
+REMOVE THIS FILE AFTER ADDING DATA TO THIS FOLDER.
+This is jsut so the folders are saved to Git.
diff --git a/02_Cleaning_scripts/HAB_FWC.R b/02_Cleaning_scripts/HAB_FWC.R
@@ -11,13 +11,7 @@
 
 library(tidyverse)
 
-HAB <- read_csv("01_Data_raw/HAB/HAB_FWC.csv")
-
-# Save it as an .Rdata file so it can be read into the Shiny app
-save(HAB, file = "03_Data_for_app/HAB.RData")
-
-saveRDS(HAB, "03_Data_for_app/HAB.Rds")
-
+HAB <- read_csv("01_Data_raw/Water_Quality/HAB/HAB_FWC.csv")
 
 
 # Create long format so it can be used in the Shiny app
@@ -28,7 +22,8 @@ HAB <- HAB %>%
                names_to = "vars", values_to = "vals")
 HAB$Date <- dmy(HAB$`Sample Date`)
 
-save(HAB, file = "03_Data_for_app/HAB.RData")
+# Save it as an .Rds file so it can be read into the Shiny app
+saveRDS(HAB, "03_Data_for_app/HAB.Rds")
 
 # Check all unique categories
 unique(HAB$Description)

diff --git a/02_Cleaning_scripts/WIN_data_clean.R b/02_Cleaning_scripts/WIN_data_clean.R
@@ -9,7 +9,24 @@ library(tidyverse)
 GTMNERR <- st_read("03_Data_for_app/shapefiles_new/counties_GTMNERR.shp")
 # CRS: NAD83 / UTM zone 17N
 # WIN Data
-gps_data <- read.csv("./01_Data_raw/WIN/WIN_data_merged_20240501.csv")
+
+# Already select which variables we want straightaway. Saves resources. Pick 
+# things that have "Org" in the name because those are the units/names/etc 
+# submitted by the sampling organization - which should be the GTM for this (?)
+gps_data <- read_csv("./01_Data_raw/Water_Quality/WIN/WIN_data_merged_20240501.csv",
+                     col_select = c(`Monitoring Location ID`, `Activity Type`, 
+                                    `Activity Start Date Time`, `Activity Depth`,
+                                    `DEP Result ID`, `Org Analyte Name`, `Org Result Value`,
+                                    `Org Result Unit`, `Org MDL`, `RowID`, `LocationID`,
+                                    `Station ID`, `Station Name`, `Station Type`, `County`,
+                                    `Location_1`, `Location_2`))
+
+lookup_names <- read_csv("03_Data_for_app/WQ_lookup_names.csv")
+
+# Change column names so we can later merge this with other WQ data
+recode_vec <- setNames(lookup_names$original_name, lookup_names$dashboard_name)
+gps_data <- gps_data %>% 
+  rename(any_of(recode_vec))
 
 #### GTMNERR boundary and aquatic preserves ####
 
@@ -41,7 +58,7 @@ bound_box <- st_bbox(st_sfc(pt1, pt3, pt4, pt2, crs = st_crs(GTMNERR)))
 
 # Filter GPS coordinates
 # Convert to sf object
-gps_sf <- st_as_sf(gps_data, coords = c("Location_2", "Location_1"), crs = 4326)
+gps_sf <- st_as_sf(gps_data, coords = c("Longitude", "Latitude"), crs = 4326)
 
 # Crop GPS points within the bounding box
 gps_cropped <- st_crop(gps_sf, bound_box) 
@@ -65,16 +82,16 @@ coordinates <- st_coordinates(gps_cropped)
 WIN_df <- cbind(WIN_df, coordinates)
 
 # Rename the coordinates columns if necessary
-colnames(WIN_df)[(ncol(WIN_df)-1):ncol(WIN_df)] <- c("longitude", "latitude")
+colnames(WIN_df)[(ncol(WIN_df)-1):ncol(WIN_df)] <- c("Longitude", "Latitude")
 
 #### Keep only columns with varying information ####
 # Function to remove columns with the same value in the whole column
-remove_constant_columns <- function(df) {
-  df <- df[, sapply(df, function(col) length(unique(col)) > 1)]
-  return(df)
-}
-
-WIN_df <- remove_constant_columns(WIN_df)
+# remove_constant_columns <- function(df) {
+#   df <- df[, sapply(df, function(col) length(unique(col)) > 1)]
+#   return(df)
+# }
+# 
+# WIN_df <- remove_constant_columns(WIN_df)
 
 #### Reformat data to visualize easily ####
 # turn WIN_df into long format with the following columns
@@ -83,29 +100,22 @@ WIN_df <- remove_constant_columns(WIN_df)
 
 # Convert all columns to character before pivoting and retain the original row identifier
 WIN_df <- WIN_df %>%
-  select(-all_of(c("Station.ID", 
-                   "Station.Name", 
-                   "Org.Analyte.Name", 
-                   "DEP.Result.Value.Number", 
-                   "DEP.MDL",
-                   "DEP.PQL",
-                   "Org.Detection.Unit",
-                   "Org.Result.Unit",
-                   "Activity.End.Date.Time"))) %>%
+  # Add a column to record the data source/provider
+  mutate(data_source = "WIN") %>% # or change this to DEP?
   mutate(across(everything(), as.character)) %>%
   mutate_all(~ na_if(., "")) %>%
   pivot_longer(
     cols =  -c(RowID), # Exclude the Row_ID column from pivoting
     names_to = "variable",
     values_to = "value"
-  ) %>%
-  mutate(value = na_if(value, ""))
+  ) 
   # filter(!is.na(value) & value != "") # use this if space is an issue
 
 #### Save data ####
 # Save the filtered data to a new CSV file
 # write.csv(WIN_df, 
 #           "03_Data_for_app/Filtered_WIN_data_merged_20240501.csv", 
 #           row.names = FALSE)
-# Save the filtered data to a .RData file
-save(WIN_df, file = "03_Data_for_app/WIN.RData")
+# Save the filtered data to a .RDs file
+
+saveRDS(WIN_df, "03_Data_for_app/WIN.Rds")
diff --git a/02_Cleaning_scripts/WQ_GTMNERR.R b/02_Cleaning_scripts/WQ_GTMNERR.R
@@ -13,7 +13,7 @@ library(tidyverse)
 library(readxl)
 
 ### 1. Read in data -----------------------------------------------------------
-WQ <- read_excel("01_Data_raw/Guana_WQ/Guana_masterdata.xlsx",
+WQ <- read_excel("01_Data_raw/Water_Quality/Guana_WQ/Guana_masterdata.xlsx",
                  sheet = 1, # There is only one sheet, but just to be safe
                  guess_max = 13000) # This is not ideal + cols 14 and 16 have a 
 # mix of logical and numbers. Lord.
@@ -24,10 +24,17 @@ WQ <- read_excel("01_Data_raw/Guana_WQ/Guana_masterdata.xlsx",
 #col_types = c("SampleDate" = "date", "#RQ" = "text")) # If not specified you get
 # warnings (as it expects logical; text only starts after row 1445)
 
-WQ_meta <- read_csv("01_Data_raw/Guana_WQ/guana_data_dictionary_updateGK.csv")
+WQ_meta <- read_csv("01_Data_raw/Water_Quality/Guana_WQ/guana_data_dictionary_updateGK.csv")
 # Some stations have two codes due to a name change (see Word doc with metadata)
 # Don't remove
 
+lookup_names <- read_csv("03_Data_for_app/WQ_lookup_names.csv")
+
+# Change column names so we can later merge this with other WQ data
+recode_vec <- setNames(lookup_names$original_name, lookup_names$dashboard_name)
+WQ <- WQ %>% 
+  rename(any_of(recode_vec))
+
 ### 2. Check categorical values ------------------------------------------------
 # Check station names, componentLong and componentShort (spelling etc)
 unique(WQ$StationCode)
@@ -60,38 +67,61 @@ unique(WQ$Remark) # inconsistent... But there are some capital letters that
 # Change station code column name so it is the same as the column in the data
 names(WQ_meta)[names(WQ_meta) == "station_code"] <- "StationCode"
 
+WQ$StationCode <- str_trim(WQ$StationCode)
+
 WQ <- WQ %>% 
-  left_join(WQ_meta) %>% 
-  select(-Lat, -Long)
+  left_join(WQ_meta, by = c("StationCode")) %>% # not all stations in the original have lat/lon
+  mutate(Latitude = coalesce(Latitude.y, Latitude.x),
+         Longitude = coalesce(Longitude.y, Longitude.x)) %>% 
+  select(-c(Latitude.x, Latitude.y, Longitude.x, Longitude.y))
 
 # Stations missing from metadata: GL1.5, GL2.5 and GL3.5 -> added manually and
 # emailed Nikki
 
-which(is.na(WQ$lat))
-which(is.na(WQ$long))
+# Check
+which(is.na(WQ$Latitude))
+which(is.na(WQ$Longitude))
+
+# There is GTMMOLNUT and GTMMOLNUT_dup...? I think we can leave it in for now;I give
+# them the same site, site acronym and site friendly name...
+# And GTMLMNUT is the same? But DEP code? Fix this...
+
+# WQ[which(is.na(WQ$Latitude)),] # duplicates?? 
+# WQ <- WQ[-which(is.na(WQ$Latitude)),]
+
+# Some station codes appear to have changed over time. Make sure only one code is
+# reflected (otherwise we have issues with the dashboard)
+# GTMDNNUT -> GTMLSNUT (Lake South)
+# GTMDSNUT -> GTMRNNUT (River North)
+# GTMOLNUT and GTMOLNUT_dup -> GTMLMNUT
+
+replacement <- data.frame(StationCode = c("GTMDNNUT", "GTMDSNUT", "GTMOLNUT", "GTMOLNUT_dup"),
+                          StationCode_repl = c("GTMLSNUT", "GTMRNNUT", "GTMLMNUT", "GTMLMNUT"))
 
-WQ[which(is.na(WQ$lat)),] # duplicates?? Remove for now; emailed Nikki
-WQ <- WQ[-which(is.na(WQ$lat)),]
+WQ <- WQ %>%
+  left_join(replacement, by = "StationCode") %>%
+  mutate(StationCode = coalesce(StationCode_repl, StationCode)) %>%
+  select(-StationCode_repl)
 
 # Create a separate dataframe with only station info, not the data (makes map
 # too heavy)
 WQ_locations <- WQ %>% 
   mutate(Year = year(SampleDate)) %>% 
-  select(site_friendly, Year, site_acronym, lat, long, wbid, location) %>% 
-  group_by(site_friendly, site_acronym, lat, long, wbid, location) %>% 
-  summarize(maxYear = max(Year), minYear = min(Year)) 
+  select(site_friendly, Year, site_acronym, Latitude, Longitude, wbid, location) %>% 
+  group_by(site_friendly, site_acronym, Latitude, Longitude, wbid, location) %>% 
+  summarize(maxYear = max(Year), minYear = min(Year)) %>% 
+  mutate(type = "Water quality",
+         dataset = "Guana Water Quality Monitoring (GTMNERR)")
 
 WQ_data_available <- WQ %>% 
   mutate(Year = year(SampleDate)) %>% 
   select(StationCode, Year, SampleType, ComponentShort, ComponentLong, site_friendly, 
-         site_acronym, lat, long, wbid, location) %>% 
+         site_acronym, Latitude, Longitude, wbid, location) %>% 
   distinct()
 
 ### 4. Save data ---------------------------------------------------------------
 
-# Save it as an .Rdata (and .Rds?) file so it can be read into the Shiny app
-save(WQ, file = "03_Data_for_app/WQ.RData")
+# Save it as an .Rds file so it can be read into the Shiny app
 saveRDS(WQ, "03_Data_for_app/WQ.Rds")
 
-save(WQ_locations, file = "03_Data_for_app/WQ_locations.RData")
 saveRDS(WQ_locations, "03_Data_for_app/WQ_locations.Rds")
diff --git a/02_Cleaning_scripts/WQ_WIN_merge.R b/02_Cleaning_scripts/WQ_WIN_merge.R
@@ -0,0 +1,125 @@
+########################################################################
+########## NERRS Science Transfer project - GTMNERR        #############
+########################################################################
+
+# Geraldine Klarenberg, PhD
+# [email protected]
+# 12 August 2024
+
+# Load packages
+library(tidyverse)
+
+# Merge WIN and WQ data into one
+
+WIN <- readRDS("03_Data_for_app/WIN.Rds")
+WQ_GTMNERR <- readRDS("03_Data_for_app/WQ.Rds")
+
+# Make WQ_GTMNERR long format as well, just like WIN
+# Having a column for the dates is advisable though, as it is a separate data
+# type. Update that later, not right now.
+
+#### We need to update the variable list so the names are the same!
+
+# First make sure that every row has a UNID and also add a column for the data
+# source / provider
+WQ_GTMNERR <- WQ_GTMNERR %>% 
+  mutate(data_source = "GTMNERR") %>% 
+  arrange(UNID, StationCode, SampleDate, ComponentShort)
+
+for (i in 1:nrow(WQ_GTMNERR)){
+  if (is.na(WQ_GTMNERR$UNID[i])){
+    WQ_GTMNERR$UNID[i] <- WQ_GTMNERR$UNID[i-1] + 1
+  }
+}
+# Check there are no duplicates
+sum(duplicated(WQ_GTMNERR$UNID))
+
+# Add a geometry column, to later use for clicking markers (we might change this)
+WQ_GTMNERR <- st_as_sf(WQ_GTMNERR, coords = c("Longitude", "Latitude"), 
+                       crs = 4326, remove = FALSE)
+# Turn back into dataframe with geometry as a column
+WQ_GTMNERR <- as.data.frame(WQ_GTMNERR)
+
+WQ_GTMNERR_long <- WQ_GTMNERR %>% 
+  mutate(across(everything(), as.character)) %>%
+  mutate_all(~ na_if(., "")) %>% 
+  pivot_longer(cols = -UNID, 
+               names_to = "variable",
+               values_to = "value")
+
+# How to deal with UNID when merging? Start counting anew (or add however far the
+# one dataset is?)
+
+min(as.numeric(WQ_GTMNERR_long$UNID)) # 1
+max(as.numeric(WQ_GTMNERR_long$UNID)) # 17098 
+
+min(as.numeric(WIN$RowID)) # 55135
+max(as.numeric(WIN$RowID)) # 3677602
+# Appears there will be no overlap. Merge.
+
+WQ_GTMNERR_long <- WQ_GTMNERR_long %>% 
+  rename(RowID = UNID)
+
+WQ_all <- WIN %>% 
+  full_join(WQ_GTMNERR_long)
+
+unique(WQ_all$variable)
+unique(WQ_all$value)
+
+# Read in the WQ vars lookup table and replace variables with the names we need
+lookup_WQ_vars <- read_csv("03_Data_for_app/WQ_lookup_variables.csv")
+
+# # I am sure there is a nicer/quicker/tidyverse way of doing this, but whatevs for now
+# for (i in 1:nrow(WQ_all)){
+#   if (is.na(WQ_all$value[i])){
+#     next
+#   }
+#   for (j in 1:nrow(lookup_WQ_vars)){
+#     if (WQ_all$value[i] == lookup_WQ_vars$value[j]){
+#       WQ_all$value[i] <- lookup_WQ_vars$new[j]
+#     }
+#   }
+# }
+
+# Quicker way
+WQ_all <- WQ_all %>%
+  left_join(lookup_WQ_vars, by = "value") %>%
+  mutate(value = coalesce(new, value)) %>%
+  select(-new)
+
+# Filter for only the things that we need:
+
+# In ComponentLong:
+
+# Air temperature
+# Ammonium (filtered)
+# Chlorophyll
+# Chlorophyll a (corrected)
+# Chlorophyll a (uncorrected)
+# Fecal coliform
+# Dissolved oxygen
+# Organic carbon
+# pH
+# Salinity
+# Specific conductance
+# Total nitrogen (TKN + nitrate + nitrite)
+# Phosphorus (total)
+# Total dissolved solids
+# Turbidity
+# Water temperature
+
+# I did this as follows now but I am NOT happy about it and we should update it.
+# Because this only removes the variables names but keeps everything else
+# associated with that station (which might make things unnecesarily slow)
+# Maybe we can do it by rowID?
+selected_values <- c("Air temperature", "Ammonium (filtered)", "Chlorophyll", "Chlorophyll a (corrected)",
+                     "Chlorophyll a (uncorrected)", "Fecal coliform", "Dissolved oxygen", "Organic carbon",
+                     "pH", "Salinity", "Specific conductance", "Total nitrogen (TKN + nitrate + nitrite)",
+                     "Phosphorus (total)", "Total dissolved solids", "Turbidity", "Water temperature")
+
+WQ_all <- WQ_all %>%
+  filter((variable == "ComponentLong" & value %in% selected_values) | variable != "ComponentLong")
+
+
+# Save data
+saveRDS(WQ_all, "03_Data_for_app/WQ_all.Rds")