-
Notifications
You must be signed in to change notification settings - Fork 1
/
02_clean_Geo_traitData.R
120 lines (95 loc) · 5.02 KB
/
02_clean_Geo_traitData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# libraries ---------------------------------------------------------------
library(tidyverse)
# data --------------------------------------------------------------------
# Taken from Engemann, K., Sandel, B., Boyle, B., Enquist, B. J., Jørgensen, P. M., Kattge, J., McGill, B. J., Morueta-Holme, N., Peet, R. K., Spencer, N. J., Violle, C., Wiser, S. K. and Svenning, J.-C. (2016),
# A plant growth form dataset for the New World. Ecology, 97: 3243. doi:10.1002/ecy.1569
GrowForm<-read.table("./data/base/GrowthForm_Final.txt",header=TRUE)
# traits data
# maximum plant height (m)
# SLA (cm2/g),
# seed mass (mg)
# Leaf phosphorus and leaf nitrogen concentration per mass (Leaf N and Leaf P) (mg/g)
# wood density (mg/cm3).
Traits_BIEN<-read.csv("./data/base/2018_02_07_BIEN_trait_data.csv")
# Filter traits -----------------------------------------------------------
## Include only the six main trait levels
fun_traits<-c("whole plant leaf area per whole plant leaf dry mass",
"seed mass","whole plant height","stem wood density",
"leaf nitrogen content per leaf dry mass",
"leaf phosphorus content per leaf dry mass")
Traits_BIEN_sub<-
Traits_BIEN %>%
filter(trait_name%in%fun_traits) %>%
dplyr::select(scrubbed_species_binomial,trait_name,trait_value,unit)
Traits_BIEN_sub<-droplevels(Traits_BIEN_sub)
# Filter non numerical trait values -----------------------------------------
## Trait values such as "." or "*" and even "0"
Traits_BIEN_sub<-
Traits_BIEN_sub %>%
filter(trait_value!=".") %>%
filter(trait_value!="*") %>%
filter(trait_value!="0")
Traits_BIEN_sub<-droplevels(Traits_BIEN_sub)
## Exploring the number of traits with values per species
species_coverage<-
Traits_BIEN_sub %>%
group_by(scrubbed_species_binomial) %>%
summarise(N_traits=n_distinct(trait_name))
# Reshaping data frame ----------------------------------------------------
#Renaming trait factors
Traits_BIEN_sub$trait_name<-as.character(Traits_BIEN_sub$trait_name)
Traits_BIEN_sub$trait_name[which(Traits_BIEN_sub$trait_name=="stem wood density")]<-"Wood_density"
Traits_BIEN_sub$trait_name[which(Traits_BIEN_sub$trait_name=="leaf nitrogen content per leaf dry mass")]<-"Leaf_N"
Traits_BIEN_sub$trait_name[which(Traits_BIEN_sub$trait_name=="seed mass")]<-"Seed_mass"
Traits_BIEN_sub$trait_name[which(Traits_BIEN_sub$trait_name=="whole plant height")]<-"Height"
Traits_BIEN_sub$trait_name[which(Traits_BIEN_sub$trait_name=="leaf phosphorus content per leaf dry mass")]<-"Leaf_P"
Traits_BIEN_sub$trait_name[which(Traits_BIEN_sub$trait_name=="whole plant leaf area per whole plant leaf dry mass")]<-"SLA"
Traits_BIEN_sub$trait_name<-as.factor(Traits_BIEN_sub$trait_name)
# Number of observations per trait values in each species
# For example, Abarema jupunba has 16 values of stem_wood_density
Traits_BIEN_sub %>%
group_by(scrubbed_species_binomial,trait_name) %>%
tally()
# Same but using the count function
Traits_BIEN_sub %>%
count(scrubbed_species_binomial,trait_name)
## Double checking the units of the traits
Traits_BIEN_sub %>%
group_by(trait_name,unit) %>%
summarise(N_sp=length(scrubbed_species_binomial))
# Change class from factor to numeric
Traits_BIEN_sub$trait_value_NU<-as.numeric(as.character(Traits_BIEN_sub$trait_value))
## Nas are produced in some fields with weird values, so these help us to flag observation with potential errors
Traits_BIEN_sub$trait_value[is.na(Traits_BIEN_sub$trait_value_NU)]
## There is an observation that has a comma
Traits_BIEN_sub$trait_value<-gsub(",","",Traits_BIEN_sub$trait_value)
Traits_BIEN_sub$trait_value_NU<-as.numeric(as.character(Traits_BIEN_sub$trait_value))
#Traits_BIEN_sub$trait_value[is.na(Traits_BIEN_sub$trait_value_NU)]
# Calculate main trait values per species ---------------------------------
# Only one trait value per species
Mean_Traits_BIEN <-
Traits_BIEN_sub %>%
group_by(scrubbed_species_binomial,trait_name) %>%
dplyr::summarise(trait_value=mean(trait_value_NU,na.rm=TRUE))
var_names<-as.character(unique(Mean_Traits_BIEN$trait_name))
# Create trait values dataframe in a wide format
trait_df_wide<-
Mean_Traits_BIEN %>%
spread(trait_name,trait_value)
# Include growth form -----------------------------------------------------
GrowForm_tmp<-
GrowForm %>%
dplyr::select(FAMILY_STD,SPECIES_STD,GROWTHFORM_STD,GROWTHFORM_DIV)
Trait_BIEN_df<-merge(x=trait_df_wide,y=GrowForm_tmp,
by.x="scrubbed_species_binomial",
by.y="SPECIES_STD",
all.x=TRUE)
# Include a new Growth form with a more general classification
woody<-c("Tree","Liana","Shrub","Woody epiphyte")
Trait_BIEN_df$GROWTHFORM_GEN<-ifelse(Trait_BIEN_df$GROWTHFORM_STD%in%woody,"woody","herbaceous")
Trait_BIEN_df$GROWTHFORM_GEN[which(is.na(Trait_BIEN_df$GROWTHFORM_STD))]<-NA
Trait_BIEN_df %>%
group_by(GROWTHFORM_GEN) %>%
summarise(sp_number=n_distinct(scrubbed_species_binomial))
# Write clean datasets ----------------------------------------------------
write.csv(Trait_BIEN_df,"./data/processed/BIEN_trait_GrowthForm.csv")