-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcoral_richness.R
111 lines (74 loc) · 3.45 KB
/
coral_richness.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# calculating richness of coral transects
library(vegan)
library(ggplot2)
# write a function to calculate species richness of VME morphospecies for each transect
calculate_species_richness <- function(df) {
# Filter the dataframe to include only rows where 'label_hierarchy' contains the specified categories
filtered_df <- df[grepl("Cnidaria", df$label_hierarchy) | grepl("Porifera", df$label_hierarchy) | grepl("Bryozoa", df$label_hierarchy) | grepl("Stalked crinoids", df$label_hierarchy), ]
# Calculate richness as the number of unique species in the filtered dataframe
richness <- length(unique(filtered_df$label_name))
return(richness)
}
#function to calculate environmental averages
calculate_environmental_averages <- function(df) {
avg_temp <- mean(df$CTD.Temperature, na.rm = TRUE)
avg_pressure <- mean(df$CTD.Pressure, na.rm = TRUE)
avg_salinity <- mean(df$CTD.Salinity, na.rm = TRUE)
avg_depth <- mean(df$depth, na.rm = TRUE)
avg_gradient <- mean(df$slope, na.rm = TRUE)
# find the most common substrate for each transect
substrate_counts <- table(df$substrate)
if (length(substrate_counts) == 0) {
most_common_substrate <- "Unknown"
} else {
most_common_substrate <- names(substrate_counts[which.max(substrate_counts)])
}
# Return as a data frame
data.frame(AverageTemperature = avg_temp,
AveragePressure = avg_pressure,
AverageSalinity = avg_salinity,
AverageDepth = avg_depth,
AverageGradient = avg_gradient,
MostCommonSubstrate = most_common_substrate)
}
process_transect <- function(file_path) {
data <- read.csv(file_path)
richness <- calculate_species_richness(data)
env_averages <- calculate_environmental_averages(data)
transect_info <- basename(file_path) # Extract transect information
# Combine richness and environmental averages
cbind(Transect = transect_info, Richness = richness, env_averages)
}
# Directory containing transect files
transect_directory <- "/Users/user/Desktop/metadata_flow/coral_50m_transects" # Update with your directory path
transect_files <- list.files(transect_directory, full.names = TRUE)
transect_data <- lapply(transect_files, process_transect)
coral_transect_summary <- do.call(rbind, transect_data)
print(coral_transect_summary)
class(coral_transect_summary)
# add a fishing impact column
coral_transect_summary$FishingImpact <- NA
print(transect_summary)
coral_transect_summary$FishingImpact <- rep('N', nrow(coral_transect_summary))
coral_transect_summary$FishingImpact[coral_transect_summary$Transect == 'dive4transect1.csv'] <- 'Y'
ta# removing transects that contain no data
coral_transect_summary <- coral_transect_summary[-c(27, 74, 75, 79), ]
# export dataframe to a csv
write.csv(coral_transect_summary, "/Users/user/Desktop/metadata_flow/SEAMOUNT_TRANSECTS/coral_transect_summary.csv", row.names=FALSE)
ggplot(transect_summary, aes(x = AverageDepth, y = Richness)) +
geom_point() +
theme_minimal() +
labs(title = "Species Richness vs Depth",
x = "Average Depth (m)",
y = "Species Richness")
ggplot(transect_summary, aes(x = AverageGradient, y = Richness)) +
geom_point() +
theme_minimal() +
labs(title = "Species Richness vs Gradient",
x = "Average Gradient (m)",
y = "Species Richness")
# check for normality
qqnorm(transect_summary$AverageGradient)
qqline(transect_summary$Richness)
shapiro.test(transect_summary$Richness)
transect_summary