HCR_Analysis_Final.Rmd

---
title: "HCR_analysis"
author: "Ryan Palaganas"
date: "2024-12-04"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r, message=FALSE, warning = FALSE}
library(dplyr)
library(ggplot2)
library(stringr)
library(ggplot2)
library(dplyr)
library(gridExtra)
library(ggpubr)
library(readr)
library(lmerTest)
```

### Read in csvs
The normalized count is generated by $normalized.count = total.intensity / median.spot.intensity$. Total intensity refers to the total spot intensity per ROI/neuron following DoG and radial symmetry spot detection. Median intensity is the median spot intensity per image, filtered by ROIs/neurons.

Gene count per pixel takes the normalized count and divides it by the ROI/neuron area in pixels, multiplied by 10000. 
```{r}
#by gene pair --- aplp2/calm1
CALM1 <- read.csv("/Users/ryanpalaganas/Desktop/Areas_of_Responsibility/20-29_Side_Projects/20_AD_TAU_TANGLES/ANALYSIS/max_projections/APLP2_CALM1/split_channels/CALM1/spots_filtered_case_gene/combined_roi_stats.csv")
APLP2 <- read.csv("/Users/ryanpalaganas/Desktop/Areas_of_Responsibility/20-29_Side_Projects/20_AD_TAU_TANGLES/ANALYSIS/max_projections/APLP2_CALM1/split_channels/APLP2/spots_filtered_case_gene/combined_roi_stats.csv")
AC_tau <- read.csv("/Users/ryanpalaganas/Desktop/Areas_of_Responsibility/20-29_Side_Projects/20_AD_TAU_TANGLES/ANALYSIS/max_projections/APLP2_CALM1/split_channels/tau/intensity/tau_intensity_mmcalls_AC_1.csv")

#sort by ROI key
CALM1 <- CALM1[order(CALM1$ROI_Key),]
CALM1 <- CALM1 %>% mutate(calm1ppixel = (Normalized_Count / Area_Pixels) * 10000)
names(CALM1)[names(CALM1) == "Normalized_Count"] <- "CALM1_normalized_count"

APLP2 <- APLP2[order(APLP2$ROI_Key),]
APLP2 <- APLP2 %>% mutate(aplp2ppixel = (Normalized_Count / Area_Pixels) * 10000)
names(APLP2)[names(APLP2) == "Normalized_Count"] <- "APLP2_normalized_count"

AC_tau <- AC_tau[order(AC_tau$ROI_Key),]

#SYT1/PRNP
PRNP <- read.csv("/Users/ryanpalaganas/Desktop/Areas_of_Responsibility/20-29_Side_Projects/20_AD_TAU_TANGLES/ANALYSIS/max_projections/SYT1_PRNP/split_channels/PRNP/spots_filtered_case_gene/combined_roi_stats.csv")
SYT1 <- read.csv("/Users/ryanpalaganas/Desktop/Areas_of_Responsibility/20-29_Side_Projects/20_AD_TAU_TANGLES/ANALYSIS/max_projections/SYT1_PRNP/split_channels/SYT1/spots_filtered_case_gene/combined_roi_stats.csv")
PS_tau <- read.csv("/Users/ryanpalaganas/Desktop/Areas_of_Responsibility/20-29_Side_Projects/20_AD_TAU_TANGLES/ANALYSIS/max_projections/SYT1_PRNP/split_channels/tau/intensity/tau_intensity_mmcalls_SP.csv")

#sort by ROI key
PRNP <- PRNP[order(PRNP$ROI_Key),]
PRNP <- PRNP %>% mutate(PRNPppixel = (Normalized_Count / Area_Pixels) * 10000)
names(PRNP)[names(PRNP) == "Normalized_Count"] <- "PRNP_normalized_count"

SYT1 <- SYT1[order(SYT1$ROI_Key),]
SYT1 <- SYT1 %>% mutate(SYT1ppixel = (Normalized_Count / Area_Pixels) * 10000)
names(SYT1)[names(SYT1) == "Normalized_Count"] <- "SYT1_normalized_count"
PS_tau <- PS_tau[order(PS_tau$ROI_Key),]

```

### combine into a single dataframe 
This chunk simply merges each gene pair into a common dataframe and drops ROIs that were manually identified as 'non neuron'
```{r}
#aplp2/calm1
AC_merge <- merge(CALM1, APLP2, by = "ROI_Key")
AC_merge <- AC_merge[,c("ROI_Key", "Image_Number.x", "Area_Pixels.x", "CALM1", "Total_Intensity.x", "Median_Intensity.x", "CALM1_normalized_count", "calm1ppixel", "APLP2", "Total_Intensity.y", "Median_Intensity.y", "APLP2_normalized_count", "aplp2ppixel")]
AC_merge <- merge(AC_merge, AC_tau, by = "ROI_Key")
AC_merge <- AC_merge[,c("ROI_Key", "Image_Number.x", "Area_Pixels.x", "CALM1", "Total_Intensity.x", "Median_Intensity.x", "CALM1_normalized_count", "calm1ppixel", "APLP2", "Total_Intensity.y", "Median_Intensity.y", "APLP2_normalized_count", "aplp2ppixel", "Mean_Tau", "Median_Tau", "Max_Tau", "Total_Tau", "Mean_tau_perpixel", "Tau_Positive", "Drop_Cell")]

colnames(AC_merge)[which(names(AC_merge) == "Image_Number.x")] <- "Case_Number"
colnames(AC_merge)[which(names(AC_merge) == "Total_Intensity.x")] <- "CALM1_Total_Intensity"
colnames(AC_merge)[which(names(AC_merge) == "Median_Intensity.x")] <- "CALM1_Median_Intensity"
colnames(AC_merge)[which(names(AC_merge) == "ROI_ID.x")] <- "ROI_ID"
colnames(AC_merge)[which(names(AC_merge) == "Area_Pixels.x")] <- "Area_Pixels"
colnames(AC_merge)[which(names(AC_merge) == "Total_Intensity.y")] <- "APLP2_Total_Intensity"
colnames(AC_merge)[which(names(AC_merge) == "Median_Intensity.y")] <- "APLP2_Median_Intensity"

head(AC_merge, n = 3)

#drop bad ROIs
AC_drop_indx <- which(AC_merge$Drop_Cell == 1)
AC_merge <- AC_merge[-AC_drop_indx, ]


#PRNP/SYT1 
PS_merge <- merge(PRNP, SYT1, by = "ROI_Key")
PS_merge <- PS_merge[,c("ROI_Key", "Image_Number.x", "Area_Pixels.x", "PRNP", "Total_Intensity.x", "Median_Intensity.x", "PRNP_normalized_count", "PRNPppixel", "SYT1", "Total_Intensity.y", "Median_Intensity.y", "SYT1_normalized_count", "SYT1ppixel")]
PS_merge <- merge(PS_merge, PS_tau, by = "ROI_Key")
PS_merge <- PS_merge[,c("ROI_Key", "Image_Number.x", "Area_Pixels.x", "PRNP", "Total_Intensity.x", "Median_Intensity.x", "PRNP_normalized_count", "PRNPppixel", "SYT1", "Total_Intensity.y", "Median_Intensity.y", "SYT1_normalized_count", "SYT1ppixel","Mean_Tau", "Median_Tau", "Max_Tau", "Total_Tau", "Mean_tau_perpixel", "Tau_Positive", "Drop_Cell")]

colnames(PS_merge)[which(names(PS_merge) == "Image_Number.x")] <- "Case_Number"
colnames(PS_merge)[which(names(PS_merge) == "Total_Intensity.x")] <- "PRNP_Total_Intensity"
colnames(PS_merge)[which(names(PS_merge) == "Median_Intensity.x")] <- "PRNP_Median_Intensity"
colnames(PS_merge)[which(names(PS_merge) == "ROI_ID.x")] <- "ROI_ID"
colnames(PS_merge)[which(names(PS_merge) == "Area_Pixels.x")] <- "Area_Pixels"
colnames(PS_merge)[which(names(PS_merge) == "Total_Intensity.y")] <- "SYT1_Total_Intensity"
colnames(PS_merge)[which(names(PS_merge) == "Median_Intensity.y")] <- "SYT1_Median_Intensity"

head(PS_merge, n = 3)

#drop bad ROIs
PS_drop_indx <- which(PS_merge$Drop_Cell == 1)
PS_merge <- PS_merge[-PS_drop_indx, ]

```

### create image IDs for tau thresholding, add in CERAD, Braak, Thal metadata
Adds in case specific metadata to each gene-pair dataframe. Also extracts an image ID to aggregate ROIs by image. 
```{r}
# Create metadata mapping dataframe
disease_mapping <- data.frame(
  Case_Number = c(2313, 2317, 2371, 2522, 2533, 2751,  # PART cases
                  2052, 2146, 2763,  # AD-sparse
                  2192, 2247, 2303, 2835,  # AD-moderate
                  2321, 2796, 2841, 2862, 2242),  # AD-frequent
  CERAD = c(0, 0, 0, 0, 0, 0,  # PART
            'A', 'A', 'A',  # AD-sparse
            'B', 'B', 'B', 'B',  # AD-moderate
            'C', 'C', 'C', 'C', 'C'),  # AD-frequent
  Braak = c(4, 2, 3, 2, 2, 2,  # PART
            2, 4, 4,  # AD-sparse
            4, 4, 4, 4,  # AD-moderate
            6, 5, 5, 6, 5),  # AD-frequent
  Thal = c(1, 1, 1, 0, 0, 0,  # PART
           1, 3, 2,  # AD-sparse
           5, 5, 4, 4,  # AD-moderate
           5, 4, 5, 5, 5),  # AD-frequent
  Disease = c(rep("PART", 6),
              rep("AD", 12)),
  Disease_Severity = c(rep("PART", 6),
                      rep("AD-sparse", 3),
                      rep("AD-moderate", 4),
                      rep("AD-frequent", 5))
)

#create image IDs for tau thresholding
AC_merge <- AC_merge %>%
  mutate(Image_ID = str_extract(ROI_Key, "^[^_]*_[^_]*_orig\\d*"))

# Populate AC_merge case numbers using disease mapping
AC_merge <- AC_merge %>%
  left_join(disease_mapping, by = c("Case_Number")) 

#create image IDs for tau thresholding
PS_merge <- PS_merge %>%
  mutate(Image_ID = str_extract(ROI_Key, "^[^_]*_[^_]*_orig\\d*"))

# Populate PS_merge case numbers using disease mapping
PS_merge <- PS_merge %>%
  left_join(disease_mapping, by = c("Case_Number")) 

```

# Quantification
For the following chunks, we follow a similar scheme. First we drop images with no tau positive cells.
We also include a qq plot and shapiro test for normality. None of the genes follow a normal distribution.
First, we look at tau positive versus negative gene expression.
We fit a linear mixed effect model for each gene where gene per pixel is the response variable, tau is a fixed variable and image ID is a random effect. Disease severity is an ordered categorical variable of amyloid severity, PART < AD-sparse < AD-moderate < AD-frequent.
Finally, we assess interactions between disease severity and tau burden's effect on gene expression using a linear mixed effect model.

## APLP2, CALM1 
```{r}
#convert Case ID, Image ID, and disease severity to factors, relevel PART disease severity to be the reference case
AC_merge$Case_Number <- factor(AC_merge$Case_Number)
AC_merge$Image_ID <- factor(AC_merge$Image_ID)
AC_merge$Disease_Severity <- factor(AC_merge$Disease_Severity, ordered=TRUE, levels = c("PART",
                                                                          "AD-sparse",
                                                                          "AD-moderate",
                                                                          "AD-frequent"))
# Find images with zero tau positive cells
AC_zero_tau_images <- AC_merge %>%
  # Group by Image_ID to get counts per image
  group_by(Image_ID) %>%
  summarize(
    # Count tau positive cells
    Tau_Pos_Count = sum(Tau_Positive),
    Tau_Neg_Count = sum(Tau_Positive == 0),
    # Get total number of cells for reference
    Total_Cells = n()
  )

#which image IDs have no tau
AC_zero_tau_list <- AC_zero_tau_images$Image_ID[which(AC_zero_tau_images$Tau_Pos_Count == 0)]
cat("Dropping images...", paste0(AC_zero_tau_list))
AC_merge <- AC_merge %>%
  # exclude images without tau
  filter(!Image_ID %in% AC_zero_tau_list)

# Get unique image IDs
AC_image_ids <- unique(AC_merge$Image_ID)

# Create empty vectors
APLP2_relative_intensities <- numeric(nrow(AC_merge))
CALM1_relative_intensities <- numeric(nrow(AC_merge))
APLP2_tau_neg_means <- numeric(length(AC_image_ids))
CALM1_tau_neg_means <- numeric(length(AC_image_ids))
APLP2_tau_pos_means <- numeric(length(AC_image_ids))
CALM1_tau_pos_means <- numeric(length(AC_image_ids))
# Create lookup table for image IDs
AC_image_lookup <- data.frame(
  Image_ID = AC_image_ids,
  Index = seq_along(AC_image_ids)
)

# Loop through each image
for(id in AC_image_ids) {
  # Get data for current image
  image_data <- AC_merge[AC_merge$Image_ID == id,]
  idx <- AC_image_lookup$Index[AC_image_lookup$Image_ID == id]
  
  # Calculate and store mean intensity for tau negative and positive ROIs
  APLP2_tau_neg_means[idx] <- mean(image_data$aplp2ppixel[image_data$Tau_Positive == 0])
  CALM1_tau_neg_means[idx] <- mean(image_data$calm1ppixel[image_data$Tau_Positive == 0])
  APLP2_tau_pos_means[idx] <- mean(image_data$aplp2ppixel[image_data$Tau_Positive == 1])
  CALM1_tau_pos_means[idx] <- mean(image_data$calm1ppixel[image_data$Tau_Positive == 1])
  
  # Calculate relative intensities -- this takes each gene per pixel measurement and divides it by the tau negative geneppixel for that image
  APLP2_relative_intensities[AC_merge$Image_ID == id] <- (AC_merge$aplp2ppixel[AC_merge$Image_ID == id]) / APLP2_tau_neg_means[idx]
  CALM1_relative_intensities[AC_merge$Image_ID == id] <- (AC_merge$calm1ppixel[AC_merge$Image_ID == id]) / CALM1_tau_neg_means[idx]
}

# Add results to dataframe
AC_merge$APLP2_Relative_Fold_Change <- APLP2_relative_intensities
AC_merge$CALM1_Relative_Fold_Change <- CALM1_relative_intensities

# normality test
AC_merge %>%
  group_by(Disease_Severity) %>%
  summarise(
    CALM1_shapiro_p = shapiro.test(log1p(calm1ppixel))$p.value,
    APLP2_shapiro_p = shapiro.test(log1p(aplp2ppixel))$p.value
  )

# Visual check
ggplot(AC_merge, aes(sample = log1p(calm1ppixel))) +
  geom_qq() +
  geom_qq_line() +
  facet_wrap(~Disease_Severity) +
  labs(title = "CALM1 per pixel qq plot")

ggplot(AC_merge, aes(sample = log1p(aplp2ppixel))) +
  geom_qq() +
  geom_qq_line() +
  facet_wrap(~Disease_Severity) +
  labs(title = "APLP2 per pixel qq plot")
```

### Linear mixed model
```{r}
#linear mixed effect model
set.seed(12)
calm1_mixed_model <- lmer(log1p(calm1ppixel) ~
                       Tau_Positive +           # Fixed effect of tau
                       (1|Image_ID),           # Random effect of image
                       data = AC_merge)

set.seed(1234)
aplp2_mixed_model <- lmer(log1p(aplp2ppixel) ~
                       Tau_Positive +           # Fixed effect of tau
                       (1|Image_ID),           # Random effect of image
                       data = AC_merge)


summary(calm1_mixed_model)
# Extract p-values from lmer models
calm1_coef <- summary(calm1_mixed_model)$coefficients
calm1_p <- calm1_coef["Tau_Positive", "Pr(>|t|)"]
calm1_estimate <- calm1_coef["Tau_Positive", "Estimate"]

aplp2_coef <- summary(aplp2_mixed_model)$coefficients
aplp2_p <- aplp2_coef["Tau_Positive", "Pr(>|t|)"]
aplp2_estimate <- aplp2_coef["Tau_Positive", "Estimate"]

# CALM1 plot
CALM1_tau <- 
  ggplot(AC_merge, aes(x=factor(Tau_Positive), y = CALM1_Relative_Fold_Change,
                     fill = factor(Tau_Positive))) +
  geom_boxplot(outlier.shape = NA) +
  theme_bw() +
  theme(
    axis.text = element_text(size=12),
    axis.title = element_text(size=14),
    plot.title = element_text(size=16, hjust=0.5),
    plot.subtitle = element_text(size=10, hjust=0.5)
  ) +
  scale_x_discrete(labels=c("0" = "Negative", "1" = "Positive")) +
  coord_cartesian(ylim = c(0, 3.5)) +
  scale_fill_manual(values = c("0" = "royalblue", "1" = "red3"),
                    labels = c("0" = "Negative", "1" = "Positive")) +
  labs(x="", 
       y="CALM1 Fold Change",
       title="CALM1",
       subtitle=sprintf("Mixed model: Effect=%.2f, p=%.3g", calm1_estimate, calm1_p),
       fill="Tau")
CALM1_tau
#ggsave("CALM1_tau.pdf")

summary(aplp2_mixed_model)
# APLP2 plot
APLP2_tau <-
  ggplot(AC_merge, aes(x=factor(Tau_Positive), y = APLP2_Relative_Fold_Change,
                     fill = factor(Tau_Positive))) +
  geom_boxplot(outlier.shape = NA) +
  theme_bw() +
  theme(
    axis.text = element_text(size=12),
    axis.title = element_text(size=14),
    plot.title = element_text(size=16, hjust=0.5),
    plot.subtitle = element_text(size=10, hjust=0.5)
  ) +
  scale_x_discrete(labels=c("0" = "Negative", "1" = "Positive")) +
  coord_cartesian(ylim = c(0, 3.5)) +
  scale_fill_manual(values = c("0" = "royalblue", "1" = "red3"),
                    labels = c("0" = "Negative", "1" = "Positive")) +
  labs(x="", 
       y="APLP2 Fold Change",
       title="APLP2",
       subtitle=sprintf("Mixed model: Effect=%.2f, p=%.3g", aplp2_estimate, aplp2_p),
       fill="Tau")

APLP2_tau
```

### grouped by disease severity
```{r}
# lets look at this grouped by disease severity
set.seed(54321)
calm1_mixed_model_DS <- lmer(log1p(calm1ppixel) ~
                       Tau_Positive * Disease_Severity +  # Interaction between tau and disease
                       (1|Image_ID),                     # Random effect of image
                       data = AC_merge)

set.seed(543)
aplp2_mixed_model_DS <- lmer(log1p(aplp2ppixel) ~
                       Tau_Positive * Disease_Severity +  # Interaction between tau and disease
                       (1|Image_ID),                     # Random effect of image
                       data = AC_merge)

summary(calm1_mixed_model_DS)

# Extract coefficients and p-values for each disease severity level
calm1_coef <- summary(calm1_mixed_model_DS)$coefficients
aplp2_coef <- summary(aplp2_mixed_model_DS)$coefficients

# CALM1 plot
CALM1_ds_tau <-
  ggplot(AC_merge, aes(x = factor(Tau_Positive), y = CALM1_Relative_Fold_Change, 
                     fill = factor(Tau_Positive))) +
  geom_boxplot(outlier.shape = NA) +
  theme_bw() +
  facet_wrap(~Disease_Severity, nrow = 1) +
  theme(
    axis.text = element_text(size=12),
    axis.title = element_text(size=14),
    plot.title = element_text(size=16, hjust=0.5),
    plot.subtitle = element_text(size=10, hjust=0.5),
    strip.text = element_text(size=12)
  ) +
  scale_x_discrete(labels=c("0" = "Negative", "1" = "Positive")) +
  scale_fill_manual(values = c("0" = "royalblue", "1" = "red3"),
                    labels = c("0" = "Negative", "1" = "Positive")) +
  coord_cartesian(ylim = c(0, 3.5)) +
  labs(x = "", 
       y = "CALM1 Fold Change",
       title = "CALM1",
       fill = "Tau")
 
CALM1_ds_tau
#ggsave("CALM1_ds_tau.pdf")

summary(aplp2_mixed_model_DS)
# APLP2 plot 
APLP2_ds_tau <-
  ggplot(AC_merge, aes(x = factor(Tau_Positive), y = APLP2_Relative_Fold_Change, 
                     fill = factor(Tau_Positive))) +
  geom_boxplot(outlier.shape = NA) +
  facet_wrap(~Disease_Severity, nrow = 1) +
  theme_bw() +
  theme(
    axis.text = element_text(size=12),
    axis.title = element_text(size=14),
    plot.title = element_text(size=16, hjust=0.5),
    plot.subtitle = element_text(size=10, hjust=0.5),
    strip.text = element_text(size=12)
  ) +
  scale_x_discrete(labels=c("0" = "Negative", "1" = "Positive")) +
  scale_fill_manual(values = c("0" = "royalblue", "1" = "red3"),
                    labels = c("0" = "Negative", "1" = "Positive")) +
  coord_cartesian(ylim = c(0, 3.5)) +
  labs(x ="", 
       y = "APLP2 Fold Change",
       title = "APLP2",
       fill = "Tau")

APLP2_ds_tau
#ggsave("APLP2_ds_tau.pdf")

ggplot(AC_merge, aes(x = Disease_Severity, y = log1p(calm1ppixel),
                     fill = Disease_Severity)) +
  geom_boxplot()  + theme_bw() + labs(title = "CALM1 expression by amyloid severity")

ggplot(AC_merge, aes(x = Disease_Severity, y = log1p(aplp2ppixel),
                     fill = Disease_Severity)) +
  geom_boxplot() + theme_bw() + labs(title = "APLP2 expression by amyloid severity")

  
```


## PRNP, SYT1
```{r}
#convert Case ID, Image ID, and disease severity to factors
PS_merge$Case_Number <- factor(PS_merge$Case_Number)
PS_merge$Image_ID <- factor(PS_merge$Image_ID)
PS_merge$Disease_Severity <- factor(PS_merge$Disease_Severity, ordered = TRUE, levels = c("PART", 
                                                                          "AD-sparse", 
                                                                          "AD-moderate", 
                                                                          "AD-frequent"))

# Find images with zero tau positive cells
PS_zero_tau_images <- PS_merge %>%
  # Group by Image_ID to get counts per image
  group_by(Image_ID) %>%
  summarize(
    # Count tau positive cells
    Tau_Pos_Count = sum(Tau_Positive),
    Tau_Neg_Count = sum(Tau_Positive == 0),
    # Get total number of cells for reference
    Total_Cells = n()
  )

#which image IDs have no tau
PS_zero_tau_list <- PS_zero_tau_images$Image_ID[which(PS_zero_tau_images$Tau_Pos_Count == 0)]
cat("No images lack tau", PS_zero_tau_list)
PS_merge <- PS_merge %>%
  # exclude images without tau
  filter(!Image_ID %in% PS_zero_tau_list)

# Get unique image IDs
PS_image_ids <- unique(PS_merge$Image_ID)

# Create empty vectors
PRNP_relative_intensities <- numeric(nrow(PS_merge))
SYT1_relative_intensities <- numeric(nrow(PS_merge))
PRNP_tau_neg_means <- numeric(length(PS_image_ids))
SYT1_tau_neg_means <- numeric(length(PS_image_ids))
PRNP_tau_pos_means <- numeric(length(PS_image_ids))
SYT1_tau_pos_means <- numeric(length(PS_image_ids))
# Create lookup table for image IDs
PS_image_lookup <- data.frame(
  Image_ID = PS_image_ids,
  Index = seq_along(PS_image_ids)
)

# Loop through each image
for(id in PS_image_ids) {
  # Get data for current image
  image_data <- PS_merge[PS_merge$Image_ID == id,]
  idx <- PS_image_lookup$Index[PS_image_lookup$Image_ID == id]
  
  # Calculate and store mean intensity for tau negative ROIs
  PRNP_tau_neg_means[idx] <- mean(image_data$PRNPppixel[image_data$Tau_Positive == 0])
  SYT1_tau_neg_means[idx] <- mean(image_data$SYT1ppixel[image_data$Tau_Positive == 0])
  PRNP_tau_pos_means[idx] <- mean(image_data$PRNPppixel[image_data$Tau_Positive == 1])
  SYT1_tau_pos_means[idx] <- mean(image_data$SYT1ppixel[image_data$Tau_Positive == 1])
  
  # Calculate relative intensities
  PRNP_relative_intensities[PS_merge$Image_ID == id] <- PS_merge$PRNPppixel[PS_merge$Image_ID == id] / PRNP_tau_neg_means[idx]
  SYT1_relative_intensities[PS_merge$Image_ID == id] <- PS_merge$SYT1ppixel[PS_merge$Image_ID == id] / SYT1_tau_neg_means[idx]
}

# Add results to dataframe
PS_merge$PRNP_Relative_Fold_Change <- PRNP_relative_intensities
PS_merge$SYT1_Relative_Fold_Change <- SYT1_relative_intensities

# normality test
PS_merge %>%
  group_by(Disease_Severity) %>%
  summarise(
    PRNP_shapiro_p = shapiro.test(log1p(PRNPppixel))$p.value,
    SYT1_shapiro_p = shapiro.test(log1p(SYT1ppixel))$p.value
  )

# Visual check
ggplot(PS_merge, aes(sample = log1p(PRNPppixel))) +
  geom_qq() +
  geom_qq_line() +
  facet_wrap(~Disease_Severity) +
  labs(title = "PRNP per pixel qq plot")

ggplot(PS_merge, aes(sample = log1p(SYT1ppixel))) +
  geom_qq() +
  geom_qq_line() +
  facet_wrap(~Disease_Severity) +
  labs(title = "SYT1 per pixel change qq plot")
```

### Linear mixed model
```{r}
#linear mixed effect model
set.seed(32)
prnp_mixed_model <- lmer(log1p(PRNPppixel) ~
                       Tau_Positive +           # Fixed effect of tau
                       (1|Image_ID),           # Random effect of image
                       data = PS_merge)

set.seed(31)
SYT1_mixed_model <- lmer(log1p(SYT1ppixel) ~
                       Tau_Positive +           # Fixed effect of tau
                       (1|Image_ID),           # Random effect of image
                       data = PS_merge)


# Extract p-values from lmer models
prnp_coef <- summary(prnp_mixed_model)$coefficients
prnp_p <- prnp_coef["Tau_Positive", "Pr(>|t|)"]
prnp_estimate <- prnp_coef["Tau_Positive", "Estimate"]

SYT1_coef <- summary(SYT1_mixed_model)$coefficients
SYT1_p <- SYT1_coef["Tau_Positive", "Pr(>|t|)"]
SYT1_estimate <- SYT1_coef["Tau_Positive", "Estimate"]

summary(prnp_mixed_model)
# PRNP plot
PRNP_tau <-
  ggplot(PS_merge, aes(x=factor(Tau_Positive), y = PRNP_Relative_Fold_Change,
                     fill = factor(Tau_Positive))) +
  geom_boxplot(outlier.shape = NA) +
  theme_bw() +
  theme(
    axis.text = element_text(size=12),
    axis.title = element_text(size=14),
    plot.title = element_text(size=16, hjust=0.5),
    plot.subtitle = element_text(size=10, hjust=0.5)
  ) +
  scale_x_discrete(labels=c("0" = "Negative", "1" = "Positive")) +
  scale_fill_manual(values = c("0" = "royalblue", "1" = "red3"),
                    labels = c("0" = "Negative", "1" = "Positive")) +
  coord_cartesian(ylim = c(0, 3.5)) +
  labs(x="", 
       y="PRNP Fold Change",
       fill = "Tau",
       title="PRNP",
       subtitle=sprintf("Mixed model: Effect=%.2f, p=%.3g", prnp_estimate, prnp_p))

PRNP_tau
ggsave("PRNP_tau.pdf")

summary(SYT1_mixed_model)
# SYT1 plot
SYT1_tau <-
  ggplot(PS_merge, aes(x=factor(Tau_Positive), y = SYT1_Relative_Fold_Change,
                     fill = factor(Tau_Positive))) +
  geom_boxplot(outlier.shape = NA) +
  theme_bw() +
  theme(
    axis.text = element_text(size=12),
    axis.title = element_text(size=14),
    plot.title = element_text(size=16, hjust=0.5),
    plot.subtitle = element_text(size=10, hjust=0.5)
  ) +
  scale_x_discrete(labels=c("0" = "Negative", "1" = "Positive")) +
  scale_fill_manual(values = c("0" = "royalblue", "1" = "red3"),
                    labels = c("0" = "Negative", "1" = "Positive")) +
  coord_cartesian(ylim = c(0, 3.5)) +
  labs(x="Tau", 
       y="SYT1 Fold Change",
       fill = "Tau",
       title="SYT1",
       subtitle=sprintf("Mixed model: Effect=%.2f, p=%.3g", SYT1_estimate, SYT1_p))

SYT1_tau
ggsave("SYT1_tau.pdf")
```

### grouped by disease severity
```{r}
#linear mixed effects model
set.seed(11)
PRNP_mixed_model_DS <- lmer(log1p(PRNPppixel) ~
                       Tau_Positive * Disease_Severity +  # Interaction between tau and disease
                       (1|Image_ID),                     # Random effect of image
                       data = PS_merge)
set.seed(22)
SYT1_mixed_model_DS <- lmer(log1p(SYT1ppixel) ~ 
                       Tau_Positive * Disease_Severity +  # Interaction between tau and disease
                       (1|Image_ID),                     # Random effect of image
                       data = PS_merge)

summary(PRNP_mixed_model_DS)
# Create plot
# PRNP
PRNP_ds_tau <-
  ggplot(PS_merge, aes(x=factor(Tau_Positive), y = PRNP_Relative_Fold_Change,
                     fill = factor(Tau_Positive))) +
  geom_boxplot(outlier.shape = NA) +
  facet_wrap(~Disease_Severity, nrow = 1) +
  theme_bw() +
  theme(
    axis.text = element_text(size=12),
    axis.title = element_text(size=14),
    plot.title = element_text(size=16, hjust=0.5),
    plot.subtitle = element_text(size=10, hjust=0.5),
    strip.text = element_text(size=12)
  ) +
  scale_x_discrete(labels=c("0" = "Negative", "1" = "Positive")) +
  scale_fill_manual(values = c("0" = "royalblue", "1" = "red3"),
                    labels = c("0" = "Negative", "1" = "Positive")) +
  coord_cartesian(ylim = c(0, 3.5)) +
  labs(x="", 
       y="PRNP Fold Change",
       fill = "Tau",
       title="PRNP")
PRNP_ds_tau
#ggsave("PRNP_ds_tau.pdf")

summary(SYT1_mixed_model_DS)
# SYT1
SYT1_ds_tau <- 
  ggplot(PS_merge, aes(x=factor(Tau_Positive), y = SYT1_Relative_Fold_Change,
                     fill = factor(Tau_Positive))) +
  geom_boxplot(outlier.shape = NA) +
  facet_wrap(~Disease_Severity, nrow = 1) +
  theme_bw() +
  theme(
    axis.text = element_text(size=12),
    axis.title = element_text(size=14),
    plot.title = element_text(size=16, hjust=0.5),
    plot.subtitle = element_text(size=10, hjust=0.5),
    strip.text = element_text(size=12)
  ) +
  scale_x_discrete(labels=c("0" = "Negative", "1" = "Positive")) +
  scale_fill_manual(values = c("0" = "royalblue", "1" = "red3"),
                    labels = c("0" = "Negative", "1" = "Positive")) +
  coord_cartesian(ylim = c(0, 3.5)) +
  labs(x="", 
       y="SYT1 Fold Change",
       fill = "Tau",
       title="SYT1")
SYT1_ds_tau
#ggsave("SYT1_ds_tau.pdf")

ggplot(PS_merge, aes(x = Disease_Severity, y = log1p(PRNPppixel),
                     fill = Disease_Severity)) +
  geom_boxplot() + theme_bw() + labs(title = "PRNP expression by amyloid severity")

ggplot(PS_merge, aes(x = Disease_Severity, y = log1p(SYT1ppixel),
                     fill = Disease_Severity)) +
  geom_boxplot() + theme_bw() + labs(title = "SYT1 expression by amyloid severity")


```

```{r}
# Function to extract key statistics from a single lmer model with contrasts
extract_lmer_stats <- function(model, gene_name) {
    # Get the exact coefficient table
    coef_table <- as.data.frame(summary(model)$coefficients)
    
    # Get p-values with names
    p_values <- summary(model)$coefficients[, "Pr(>|t|)"]
    
    # Create data frame
    result <- data.frame(
        Gene = gene_name,
        Term = names(p_values),
        Estimate = coef_table[,"Estimate"],
        Std_Error = coef_table[,"Std. Error"],
        DF = coef_table[,"df"],
        t_value = coef_table[,"t value"],
        p_value = unname(p_values),
        stringsAsFactors = FALSE,
        row.names = names(p_values)
    )
    
    return(result)
}

# Function to aggregate multiple lmer models
# Make sure that the gene list is in the same order as the model list
aggregate_lmer_results <- function(model_list, gene_names) {
    # Check input lengths match
    if(length(model_list) != length(gene_names)) {
        stop("Number of models must match number of gene names")
    }
    
    # Extract stats for each model
    results_list <- mapply(
        extract_lmer_stats,
        model = model_list,
        gene_name = gene_names,
        SIMPLIFY = FALSE
    )
    
    # Combine all results
    results_df <- do.call(rbind, results_list)
    
    # Return results sorted by term and p-value
    results_df <- results_df[order(results_df$Term, results_df$p_value),]
    
    return(results_df)
}
```

## aggregate results
```{r}
genes <- c("APLP2", "CALM1", "SYT1", "PRNP")
taupvn_models <- list(aplp2_mixed_model, calm1_mixed_model, SYT1_mixed_model, prnp_mixed_model)
taupvn_agg <- aggregate_lmer_results(taupvn_models, genes)

#tau positive vs negative across disease severity 
DS_taupvn <- list(aplp2_mixed_model_DS, calm1_mixed_model_DS, SYT1_mixed_model_DS, PRNP_mixed_model_DS)
DS_taupvn_agg <- aggregate_lmer_results(DS_taupvn, genes)


```