library(stringr)
library(dplyr)
library(ggplot2)
library(reshape2)
library(readxl)
folder <- "Blair_data"
files <- list.files(folder, pattern = "[A-H]\\d.xls")
filesPaths<-paste(folder, "\\", files, sep = "")
datList <- lapply(filesPaths, read.table,sep ="\t", skip = 4, fill = TRUE, header = TRUE)
names(datList)<- str_replace(files,pattern = ".xls", replacement = "")
#go from a list to single df with a column id-ing data from each individual file.
dat <- bind_rows(datList, .id = "Sensor.Location")
#get rid of the column we don't care about and change the colnames
dat<- dat %>%
select(1:3)
names(dat)[2:3]<-c("Time","Shift")
#sanity check: shold have 8 letters x 7 reps/letter x 3000 entries = 168000 rows.
#read in the key explaining what env and Ab were combined to get the readout in the raw data.
key<- read.csv("Blair_data/key.csv")
#We only care about some of the columns.
#Sensor.Location corresponds to the column in the melted raw data of the same name. This is an indentifier for a Env/Ab combination.
#Sample.ID is the env
#Loading Sample Id is the Ab
key <- key %>%
select(Sensor.Location,Sample.ID,Loading.Sample.ID)
#merge the key and the melted data by the Ab/Env combination identifier ("Sensor Location")
keyAndDat<-merge(key, dat, by = "Sensor.Location")
keyAndDat<-keyAndDat %>%
arrange(Sensor.Location, Time)
#For unknown reasons, Time column for the control data has times rounded to 1 decimel point while the exp data rounds to 2. I am rounding all to 1 decimel place here so they all match.
keyAndDat$Time <- round(keyAndDat$Time, digits = 1)
#I want the times to start at zero, not at 300-whatever, so I'm going to make a column of adjusted times that starts at zero by subtracting the lowest value in the Time column from all of the times.
keyAndDat <- keyAndDat %>%
mutate(AdjTime = Time - min(Time))
#Put the control data in its own df.
control<- keyAndDat %>%
filter(str_detect(Loading.Sample.ID,"control"))
#Remove the control data from the experimental data
keyAndDatNoControl <- keyAndDat %>%
filter(!str_detect(Loading.Sample.ID,"control"))
#We want to substract the background control shift values from the corresponding experimental shift values.
#Corresponding control and experimental data share the same letter in the "Sensor Location" column and the same time in the Time column.
# Example: For any row of experimental data with "A" in Sensor Location, subtract the control shift value from the control data with "A" in the sensor location and a matching time. There is only one row of control data with "A".
#How to do what it says above:
# Make a new column in both the control and keyAndDatNoControl dataframes that indicates the letter part of the "Sensor location." Call it "SensorLocLetter"
#Merge the two data frames based on the SensorLocLetter column AND the Time column. This causes each experimental data point AND its corresponding control data point to be in the same row.
#Make a new column that is the difference between the experimental shift column and the control shift column.
#Get letters
control <- control %>%
mutate(SensorLocLetter = str_extract(Sensor.Location, "[A-Z]"))
#repeat with experimental data:
keyAndDatNoControl<-keyAndDatNoControl %>%
mutate(SensorLocLetter = str_extract(Sensor.Location, "[A-Z]"))
#Merge on SensorLocLetter and Time
#all.x and all.y = true will add rows with NAs for any instances where there were non-matches
mergeExpAndCntl<- merge(keyAndDatNoControl, control, by = c("SensorLocLetter", "Time"), all.x = TRUE, all.y = TRUE)
#Change the column names so they make more sense
colnames(mergeExpAndCntl) <- str_replace(colnames(mergeExpAndCntl),".x",".Exp")
colnames(mergeExpAndCntl)<-
str_replace(colnames(mergeExpAndCntl),".y",".Cntl")
#do the subtraction
mergeExpAndCntl <- mergeExpAndCntl %>%
mutate(CorrectedShift = Shift.Exp - Shift.Cntl)
#plotting
#the plots Blair made in Prism show time vs shift with different colored lines for each Env ("Sample ID"). She made different plots for each Ab ("Loading Sample ID"")
ggplot(mergeExpAndCntl, aes(x = AdjTime.Exp , y = CorrectedShift ))+
geom_line(aes(group = Sample.ID.Exp, color = Sample.ID.Exp))+
facet_wrap(~Loading.Sample.ID.Exp, scales = "free_y")+
labs(x = "Time", y = "Background Corrected Shift")+
scale_color_discrete(name = "Envelope")