Skip to content

Latest commit

 

History

History
137 lines (79 loc) · 4.4 KB

Blair_BLI.md

File metadata and controls

137 lines (79 loc) · 4.4 KB
library(stringr)
library(dplyr)
library(ggplot2)
library(reshape2)
library(readxl)


folder <- "Blair_data"

files <- list.files(folder, pattern = "[A-H]\\d.xls")

filesPaths<-paste(folder, "\\", files, sep = "")

datList <- lapply(filesPaths, read.table,sep ="\t", skip = 4, fill = TRUE, header = TRUE)

names(datList)<- str_replace(files,pattern = ".xls", replacement = "")




#go from a list to single df with a column id-ing data from each individual file.

dat <- bind_rows(datList, .id = "Sensor.Location")

#get rid of the column we don't care about and change the colnames
dat<- dat %>%
  select(1:3)

names(dat)[2:3]<-c("Time","Shift")

#sanity check: shold have 8 letters x 7 reps/letter x 3000 entries = 168000 rows.
#read in the  key explaining what env and Ab were combined to get the readout in the raw data.
key<- read.csv("Blair_data/key.csv")


#We only care about some of the columns.
#Sensor.Location corresponds to the column in the melted raw data of the same name. This is an indentifier for a Env/Ab combination.
#Sample.ID is the env
#Loading Sample Id is the Ab

key <- key %>%
  select(Sensor.Location,Sample.ID,Loading.Sample.ID)

#merge the key and the melted data by the Ab/Env combination identifier ("Sensor Location")
keyAndDat<-merge(key, dat, by = "Sensor.Location")

keyAndDat<-keyAndDat %>%
  arrange(Sensor.Location, Time)

#For unknown reasons, Time column for the control data has times rounded to 1 decimel point while the exp data rounds to 2. I am rounding all to 1 decimel place here so they all match.

keyAndDat$Time <- round(keyAndDat$Time, digits = 1)

#I want the times to start at zero, not at 300-whatever, so I'm going to make a column of adjusted times that starts at zero by subtracting the lowest value in the Time column from all of the times.

keyAndDat <- keyAndDat %>%
  mutate(AdjTime = Time - min(Time))
#Put the control data in its own df.
control<- keyAndDat %>%
  filter(str_detect(Loading.Sample.ID,"control"))

#Remove the control data from the experimental data

keyAndDatNoControl <- keyAndDat %>%
  filter(!str_detect(Loading.Sample.ID,"control"))
#We want to substract the background control shift values from the corresponding experimental shift values.

#Corresponding control and experimental data share the same letter in the "Sensor Location" column and the same time in the Time column. 

# Example: For any row of experimental data with "A" in Sensor Location, subtract the control shift value from the control data with "A" in the sensor location and a matching time. There is only one row of control data with "A". 


#How to do what it says above:

# Make a new column in both the control and keyAndDatNoControl dataframes that indicates the letter part of the "Sensor location." Call it "SensorLocLetter"

#Merge the two data frames based on the SensorLocLetter column AND the Time column. This causes each  experimental data point AND its corresponding control data point to be in the same row.

#Make a new column that is the difference between the experimental shift column and the control shift column.



#Get letters
control <- control %>%
  mutate(SensorLocLetter = str_extract(Sensor.Location, "[A-Z]"))

#repeat with experimental data:

keyAndDatNoControl<-keyAndDatNoControl %>%
    mutate(SensorLocLetter = str_extract(Sensor.Location, "[A-Z]"))


#Merge on SensorLocLetter and Time
#all.x and all.y = true will add rows with NAs for any instances where there were non-matches

mergeExpAndCntl<- merge(keyAndDatNoControl, control, by = c("SensorLocLetter", "Time"), all.x = TRUE, all.y = TRUE)




#Change the column names so they make more sense

colnames(mergeExpAndCntl) <- str_replace(colnames(mergeExpAndCntl),".x",".Exp")

colnames(mergeExpAndCntl)<-
  str_replace(colnames(mergeExpAndCntl),".y",".Cntl")


#do the subtraction
mergeExpAndCntl <- mergeExpAndCntl %>%
  mutate(CorrectedShift = Shift.Exp - Shift.Cntl)




#plotting
#the plots Blair made in Prism show time vs shift with different colored lines for each Env ("Sample ID"). She made different plots for each Ab ("Loading Sample ID"")


ggplot(mergeExpAndCntl, aes(x = AdjTime.Exp , y = CorrectedShift ))+
  geom_line(aes(group = Sample.ID.Exp, color = Sample.ID.Exp))+
  facet_wrap(~Loading.Sample.ID.Exp, scales = "free_y")+
  labs(x = "Time", y = "Background Corrected Shift")+
  scale_color_discrete(name = "Envelope")