Skip to content

Commit

Permalink
Merge branch 'feature/33_feature' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
gklarenberg committed Aug 21, 2024
2 parents f9c610b + 505a3b3 commit 2da7f6d
Show file tree
Hide file tree
Showing 17 changed files with 678 additions and 419 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
site,alternate_name,site_friendly,station_code,site_acronym,lat,long,start_date,end,wbid,location
site,alternate_name,site_friendly,station_code,site_acronym,Latitude,Longitude,start_date,end,wbid,location
MICKLERS,N,Micklers,GTMMKNUT,MK,30.16073611,-81.36027778,,N,Lake,water_control
DEPGL1,N,Guana Lake 1,GTMGL1NUT,GL1,30.1504,-81.3604,,N,Lake,open_water
DEPGL2,N,Guana Lake 2,GTMGL2NUT,GL2,30.1161,-81.3511,,N,Lake,open_water
Expand Down
Binary file not shown.
34 changes: 22 additions & 12 deletions 02_Cleaning_scripts/WIN_data_clean.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,14 @@ library(tidyverse)
GTMNERR <- st_read("03_Data_for_app/shapefiles_new/counties_GTMNERR.shp")
# CRS: NAD83 / UTM zone 17N
# WIN Data
gps_data <- read.csv("./01_Data_raw/Water_Quality/WIN/WIN_data_merged_20240501.csv")
gps_data <- read_csv("./01_Data_raw/Water_Quality/WIN/WIN_data_merged_20240501.csv")

lookup_names <- read_csv("03_Data_for_app/WQ_lookup_names.csv")

# Change column names so we can later merge this with other WQ data
recode_vec <- setNames(lookup_names$original_name, lookup_names$dashboard_name)
gps_data <- gps_data %>%
rename(any_of(recode_vec))

#### GTMNERR boundary and aquatic preserves ####

Expand Down Expand Up @@ -41,7 +48,7 @@ bound_box <- st_bbox(st_sfc(pt1, pt3, pt4, pt2, crs = st_crs(GTMNERR)))

# Filter GPS coordinates
# Convert to sf object
gps_sf <- st_as_sf(gps_data, coords = c("Location_2", "Location_1"), crs = 4326)
gps_sf <- st_as_sf(gps_data, coords = c("Longitude", "Latitude"), crs = 4326)

# Crop GPS points within the bounding box
gps_cropped <- st_crop(gps_sf, bound_box)
Expand All @@ -65,7 +72,7 @@ coordinates <- st_coordinates(gps_cropped)
WIN_df <- cbind(WIN_df, coordinates)

# Rename the coordinates columns if necessary
colnames(WIN_df)[(ncol(WIN_df)-1):ncol(WIN_df)] <- c("longitude", "latitude")
colnames(WIN_df)[(ncol(WIN_df)-1):ncol(WIN_df)] <- c("Longitude", "Latitude")

#### Keep only columns with varying information ####
# Function to remove columns with the same value in the whole column
Expand All @@ -83,15 +90,17 @@ WIN_df <- remove_constant_columns(WIN_df)

# Convert all columns to character before pivoting and retain the original row identifier
WIN_df <- WIN_df %>%
select(-all_of(c("Station.ID",
"Station.Name",
"Org.Analyte.Name",
"DEP.Result.Value.Number",
"DEP.MDL",
"DEP.PQL",
"Org.Detection.Unit",
"Org.Result.Unit",
"Activity.End.Date.Time"))) %>%
select(-all_of(c("StationID",
"StationName",
"OrgAnalyteName",
"OrgResultValue", #GK: changed this from "DEPResultValueNumber", as I believe it is the DEP value we want to keep?
"DEPMDL",
"DEPPQL",
"OrgDetectionUnit",
"OrgResultUnit",
"ActivityEndDateTime"))) %>%
# Add a column to record the data source/provider
mutate(data_source = "WIN") %>% # or change this to DEP?
mutate(across(everything(), as.character)) %>%
mutate_all(~ na_if(., "")) %>%
pivot_longer(
Expand All @@ -108,4 +117,5 @@ WIN_df <- WIN_df %>%
# "03_Data_for_app/Filtered_WIN_data_merged_20240501.csv",
# row.names = FALSE)
# Save the filtered data to a .RDs file

saveRDS(WIN_df, "03_Data_for_app/WIN.Rds")
24 changes: 15 additions & 9 deletions 02_Cleaning_scripts/WQ_GTMNERR.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ WQ_meta <- read_csv("01_Data_raw/Water_Quality/Guana_WQ/guana_data_dictionary_up
# Some stations have two codes due to a name change (see Word doc with metadata)
# Don't remove

lookup_names <- read_csv("03_Data_for_app/WQ_lookup_names.csv")

# Change column names so we can later merge this with other WQ data
recode_vec <- setNames(lookup_names$original_name, lookup_names$dashboard_name)
WQ <- WQ %>%
rename(any_of(recode_vec))

### 2. Check categorical values ------------------------------------------------
# Check station names, componentLong and componentShort (spelling etc)
unique(WQ$StationCode)
Expand Down Expand Up @@ -61,32 +68,31 @@ unique(WQ$Remark) # inconsistent... But there are some capital letters that
names(WQ_meta)[names(WQ_meta) == "station_code"] <- "StationCode"

WQ <- WQ %>%
left_join(WQ_meta) %>%
select(-Lat, -Long)
left_join(WQ_meta)

# Stations missing from metadata: GL1.5, GL2.5 and GL3.5 -> added manually and
# emailed Nikki

which(is.na(WQ$lat))
which(is.na(WQ$long))
which(is.na(WQ$Latitude))
which(is.na(WQ$Longitude))

WQ[which(is.na(WQ$lat)),] # duplicates?? Remove for now; emailed Nikki
WQ <- WQ[-which(is.na(WQ$lat)),]
WQ[which(is.na(WQ$Latitude)),] # duplicates?? Remove for now; emailed Nikki
WQ <- WQ[-which(is.na(WQ$Latitude)),]

# Create a separate dataframe with only station info, not the data (makes map
# too heavy)
WQ_locations <- WQ %>%
mutate(Year = year(SampleDate)) %>%
select(site_friendly, Year, site_acronym, lat, long, wbid, location) %>%
group_by(site_friendly, site_acronym, lat, long, wbid, location) %>%
select(site_friendly, Year, site_acronym, Latitude, Longitude, wbid, location) %>%
group_by(site_friendly, site_acronym, Latitude, Longitude, wbid, location) %>%
summarize(maxYear = max(Year), minYear = min(Year)) %>%
mutate(type = "Water quality",
dataset = "Guana Water Quality Monitoring (GTMNERR)")

WQ_data_available <- WQ %>%
mutate(Year = year(SampleDate)) %>%
select(StationCode, Year, SampleType, ComponentShort, ComponentLong, site_friendly,
site_acronym, lat, long, wbid, location) %>%
site_acronym, Latitude, Longitude, wbid, location) %>%
distinct()

### 4. Save data ---------------------------------------------------------------
Expand Down
67 changes: 67 additions & 0 deletions 02_Cleaning_scripts/WQ_WIN_merge.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
########################################################################
########## NERRS Science Transfer project - GTMNERR #############
########################################################################

# Geraldine Klarenberg, PhD
# [email protected]
# 12 August 2024

# Load packages
library(tidyverse)

# Merge WIN and WQ data into one

WIN <- readRDS("03_Data_for_app/WIN.Rds")
WQ_GTMNERR <- readRDS("03_Data_for_app/WQ.Rds")

# Make WQ_GTMNERR long format as well, just like WIN
# Having a column for the dates is advisable though, as it is a separate data
# type. Update that later, not right now.

# First make sure that every row has a UNID and also add a column for the data
# source / provider
WQ_GTMNERR <- WQ_GTMNERR %>%
mutate(data_source = "GTMNERR") %>%
arrange(UNID, StationCode, SampleDate, ComponentShort)

for (i in 1:nrow(WQ_GTMNERR)){
if (is.na(WQ_GTMNERR$UNID[i])){
WQ_GTMNERR$UNID[i] <- WQ_GTMNERR$UNID[i-1] + 1
}
}
# Check there are no duplicates
sum(duplicated(WQ_GTMNERR$UNID))

# Add a geometry column, to later use for clicking markers (we might change this)
WQ_GTMNERR <- st_as_sf(WQ_GTMNERR, coords = c("Longitude", "Latitude"),
crs = 4326, remove = FALSE)
# Turn back into dataframe with geometry as a column
WQ_GTMNERR <- as.data.frame(WQ_GTMNERR)

WQ_GTMNERR_long <- WQ_GTMNERR %>%
mutate(across(everything(), as.character)) %>%
mutate_all(~ na_if(., "")) %>%
pivot_longer(cols = -UNID,
names_to = "variable",
values_to = "value")

# How to deal with UNID when merging? Start counting anew (or add however far the
# one dataset is?)

min(as.numeric(WQ_GTMNERR_long$UNID)) # 1
max(as.numeric(WQ_GTMNERR_long$UNID)) # 5016 (used to be 17098?)

min(as.numeric(WIN$RowID)) # 55135
max(as.numeric(WIN$RowID)) # 3677602
# Appears there will be no overlap. Merge.

WQ_GTMNERR_long <- WQ_GTMNERR_long %>%
rename(RowID = UNID)

WQ_all <- WIN %>%
full_join(WQ_GTMNERR_long)

unique(WQ_all$variable)

# Save data
saveRDS(WQ_all, "03_Data_for_app/WQ_all.Rds")
Binary file modified 03_Data_for_app/WIN.Rds
Binary file not shown.
Binary file modified 03_Data_for_app/WQ.Rds
Binary file not shown.
Binary file added 03_Data_for_app/WQ_all.Rds
Binary file not shown.
Binary file modified 03_Data_for_app/WQ_locations.Rds
Binary file not shown.
111 changes: 111 additions & 0 deletions 03_Data_for_app/WQ_lookup_names.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
data_source,original_name,dashboard_name
WIN,Organization ID,OrganizationID
WIN,Monitoring Location ID,MonitoringLocationID
WIN,HUC12 Name,HUC12Name
WIN,Project ID,ProjectID
WIN,Activity ID,ActivityID
WIN,Activity Type,SampleType
WIN,Activity Start Date Time,SampleDate
WIN,Activity End Date Time,ActivityEndDateTime
WIN,Activity Time Zone,ActivityTimeZone
WIN,Media,Media
WIN,Matrix,Matrix
WIN,Sample Collection Type,SampleCollectionType
WIN,Sampling Agency Name,SamplingAgencyName
WIN,SampleCollectionEquipmentName,SampleCollectionEquipmentName
WIN,Activity Depth,ActivityDepth
WIN,Activity Depth Unit,ActivityDepthUnit
WIN,Relative Depth,RelativeDepth
WIN,Activity Top Depth,ActivityTopDepth
WIN,Activity Bottom Depth,ActivityBottomDepth
WIN,Activity Depth Top Bottom Unit,ActivityDepthTopBottomUnit
WIN,Total Depth,TotalDepth
WIN,Total Depth Unit,TotalDepthUnit
WIN,Activity Representative Ind,ActivityRepresentativeInd
WIN,Activity Comments,ActivityComments
WIN,Sampler Name,SamplerName
WIN,Field Blank Batch ID,FieldBlankBatchID
WIN,Equipment Blank Batch ID,EquipmentBlankBatchID
WIN,Trip Blank Batch ID,TripBlankBatchID
WIN,Master Activity ID,MasterActivityID
WIN,Activity Loaded Date,ActivityLoadedDate
WIN,Activity Updated Date,ActivityUpdatedDate
WIN,DEP Result ID,DEPResultID
WIN,Org Result ID,OrgResultID
WIN,DEP Analyte Group,DEPAnalyteGroup
WIN,DEP Analyte Name,ComponentLong
WIN,ADaPT Analyte ID,ADaPTAnalyteID
WIN,Org Analyte Name,OrgAnalyteName
WIN,Org Result Value,OrgResultValue
WIN,Org Result Unit,OrgResultUnit
WIN,Org MDL,OrgMDL
WIN,Org PQL,OrgPQL
WIN,Org Detection Unit,OrgDetectionUnit
WIN,DEP Result Value Number,Result
WIN,DEP Result Value Text,DEPResultValueText
WIN,DEP Result Unit,Unit
WIN,DEP MDL,DEPMDL
WIN,DEP PQL,DEPPQL
WIN,Value Qualifier,ValueQualifier
WIN,Sample Fraction,SampleFraction
WIN,Prep Method,PrepMethod
WIN,Preparation Date Time,PreparationDateTime
WIN,Preparation Time Zone,PreparationTimeZone
WIN,Analysis Method,AnalysisMethod
WIN,Analysis Date Time,AnalysisDateTime
WIN,Analysis Time Zone,AnalysisTimeZone
WIN,Lab ID,LabID
WIN,Lab Name,LabName
WIN,Lab Accreditation Authority,LabAccreditationAuthority
WIN,Result Comments,ResultComments
WIN,Lab Sample ID,LabSampleID
WIN,Method Batch ID,MethodBatchID
WIN,Analytical Batch ID,AnalyticalBatchID
WIN,Dilution,Dilution
WIN,Percent Moisture,PercentMoisture
WIN,Percent Recovery,PercentRecovery
WIN,Relative Percent Difference,RelativePercentDifference
WIN,Error,Error
WIN,Target Species,TargetSpecies
WIN,Finfish Size,FinfishSize
WIN,Finfish Unit,FinfishUnit
WIN,Result Value Type,ResultValueType
WIN,Statistical Base,StatisticalBase
WIN,Audit Censored Decisions,AuditCensoredDecisions
WIN,Result Loaded Date,ResultLoadedDate
WIN,Result Updated Date,ResultUpdatedDate
WIN,RowID,RowID
WIN,LocationID,LocationID
WIN,ProgramID,ProgramID
WIN,Station ID,StationID
WIN,Station Name,StationName
WIN,Station Type,StationType
WIN,County,County
WIN,Start Date,StartDate
WIN,End Date,EndDate
WIN,Location_1,Latitude
WIN,Location_2,Longitude
GTMNERR,UNID,UNID
GTMNERR,StationCode,StationCode
GTMNERR,SampleDate,SampleDate
GTMNERR,SampleType,SampleType
GTMNERR,Lat,Latitude
GTMNERR,Long,Longitude
GTMNERR,ComponentShort,ComponentShort
GTMNERR,ComponentLong,ComponentLong
GTMNERR,Result,Result
GTMNERR,Remark,Remark
GTMNERR,Flag,Flag
GTMNERR,Unit,Unit
GTMNERR,MRL,MRL
GTMNERR,MDL,MDL
GTMNERR,Dilution,Dilution
GTMNERR,PQL,PQL
GTMNERR,DateRecieved,DateRecieved
GTMNERR,DateAnalyzed,DateAnalyzed
GTMNERR,TestComments,TestComments
GTMNERR,AnalysisMethod,AnalysisMethod
GTMNERR,Lab ID,LabID
GTMNERR,Lab Accredidation Authority,LabAccredidationAuthority
GTMNERR,Laboratory,Laboratory
GTMNERR,RQ#,RQ#
16 changes: 16 additions & 0 deletions 03_Data_for_app/lookup_creation_manual.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Guana Dashboard development
13 August 2024

For the water quality page, to streamline showing all the WQ data on one page, I manually created
a lookup table to change the headers of the separate files so they are the same. This way we can use
merge the datasets and use the existing functions on them.

The headers comfr from the files App_dev/01_Data_raw/Water_Quality/WIN/WIN_data_merged_20240501.csv and
App_dev/01_Data_raw/Water_Quality/Guana_WQ/Guana_masterdata.xlsx.

1. Copy-pasted the header names to App_dev/03_Data_for_app/WQ_lookup_names.csv and named the column original_name
2. Added a column data_sources with either WIN or GTMNERR
3. Created a column dashboard_name and copied all the names from original_name to it
4. For now (08/13/2024) I am only changing latitude, longitude, dates, sample type, components/analytes measured (DEP, not org). For visualization purposes. We can rename the rest later (analytes themselves will
also need to be renamed, as well as info on station names and types).
5. For the new names I also removed the spaces (because R inserts points there).
23 changes: 14 additions & 9 deletions 04_Tests/leaflet_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ GTMNERR <- st_transform(GTMNERR, crs = 4326)

# Leaflet uses EPSG 3857, change this
# https://gis.stackexchange.com/questions/48949/epsg-3857-or-4326-for-web-mapping
epsg4326 <- leafletCRS(crsClass = "L.CRS.EPSG4326")
#epsg4326 <- leafletCRS(crsClass = "L.CRS.EPSG4326")
# BUT! It actually seems that Leaflet does still want shapefiles in 4326, see
# https://github.com/Leaflet/Leaflet/issues/4146 I also tried to change GTMNERR
# to crs 3857 but that threw errors. See second paragraph here:
Expand All @@ -27,11 +27,11 @@ m <- leaflet(data = GTMNERR,
options = leafletOptions(#crs = epsg4326,
minZoom = 9,
maxZoom = 18)) %>%
#setView(lng=-81.347388, lat=30.075, zoom = 11) %>%
setView(lng=-81.289, lat=29.905, zoom = 11) %>%
clearBounds() %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addPolygons(color = "purple", fill = NA) #%>%
#addMarkers(lng=-81.347388, lat=30.075, popup="Guana")
addPolygons(color = "purple", fill = NA) %>%
addMarkers(lng=-81.347388, lat=30.075, popup="Guana")
m

counties_select <- st_read("03_Data_for_app/shapefiles_new/counties_GTMNERR.shp")
Expand All @@ -43,7 +43,7 @@ HAB_data_locations <- HAB_data %>%
distinct() %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326)

leaflet(options = leafletOptions(crs = epsg4326, minZoom = 9, maxZoom = 18)) %>%
leaflet(options = leafletOptions(minZoom = 9, maxZoom = 18)) %>%
setView(lng=-81.347388, lat=30.075, zoom = 11) %>%
# Base map
addTiles() %>% # Add default OpenStreetMap map tiles
Expand All @@ -56,10 +56,15 @@ leaflet(options = leafletOptions(crs = epsg4326, minZoom = 9, maxZoom = 18)) %>%
highlightOptions = highlightOptions(color = "white", weight = 2,
bringToFront = TRUE),
group = "Counties", popup = ~NAME) %>%
addMarkers(data = HAB_data_locations,
popup = ~paste("Site: ", Site, "<br>",
"County: ", County),
group = "HAB") %>%
addAwesomeMarkers(icon = makeAwesomeIcon(icon = "flask", markerColor = "blue", library = "fa",
iconColor = "black"), #"",
data = HAB_data_locations,
#lng= rep(-81.347388, 34), lat = rep(30.075,34)
) %>%
# addMarkers(data = HAB_data_locations,
# popup = ~paste("Site: ", Site, "<br>",
# "County: ", County),
# group = "HAB") %>%
# # Layers control (turning layers on and off)
addLayersControl(overlayGroups = c("Counties", "GTMNERR boundaries", "HAB"),
options = layersControlOptions(collapsed = FALSE))
Expand Down
Loading

0 comments on commit 2da7f6d

Please sign in to comment.