library(ggplot2)
library(ggpointdensity)
library(viridis)
library("ChIPseeker")
library("GenomicFeatures")
poppy_TxDb <- makeTxDbFromGFF("D:/Papaver_somniferum.gene.gff3")
X = annotatePeak(paste0("D:/ATAC_peak/BIA/four_tissue_sort.bed"),tssRegion = c(-3000,3000),TxDb = poppy_TxDb)
perk_related_to_gene <- as.data.frame(as.GRanges(X))
bia_gene <- read.csv("D:/ATAC_peak/BIA/BIA_gene.csv",stringsAsFactors = F)
bia_gene <- bia_gene[,-1]
perk_related_to_gene <- perk_related_to_gene[perk_related_to_gene$geneId %in% bia_gene[,2],]
perk_related_to_gene_1 <- perk_related_to_gene[abs(perk_related_to_gene[,21]) <= 3000,]
a <- as.data.frame(table(perk_related_to_gene_1$geneId))
perk_related_to_gene_2 <- perk_related_to_gene[abs(perk_related_to_gene[,21]) <= 5000,]
a_1 <- as.data.frame(table(perk_related_to_gene_2$geneId))
perk_related_to_gene_1[,2] <- perk_related_to_gene_1[,2] -1
perk_related_to_gene_1 <- perk_related_to_gene_1[,c(1,2,3,6,19,21,14,15,16)]
write.table(perk_related_to_gene_1,"D:/ATAC_peak/BIA/homer/BIA_3kb.bed",
sep = "\t",row.names = F,col.names = F,quote = F)
cd /mnt/d/ATAC_peak/BIA/homer
awk '{print $4"\t"$1"\t"$2"\t"$3"\t+"}' BIA_3kb.bed >homer_peaks.tmp
conda activate homer
findMotifsGenome.pl ./homer_peaks.tmp /Papaver_somniferum.ref.fa BIA_3kb_motifDir -p 4
Because many motif names like "athb6", we must known which gene in opium poppy is "athb6"
rm(list=ls())
## get id from homer results
library(tidyverse)
homer_result <- read.table("D:/ATAC_peak/BIA/homer/BIA_3kb_motifDir/knownResults.txt",sep = "\t",skip = 1)
colnames(homer_result) <- c("Motif Name","Consensus","P-value","Log P-value","q-value (Benjamini)","of Target Sequences with Motif","% of Target Sequences with Motif","of Background Sequences with Motif" ,"% of Background Sequences with Motif")
homer_result1 <- separate(data = homer_result, col = `Motif Name`, into = c("TF_name","data_set"), sep = "/")
homer_result1 <- homer_result1[homer_result1[,4]<=0.05,]
homer_result1 <- separate(data = homer_result1, col = TF_name, into = "TF", sep = "\\(")
homer_result1 <- as.data.frame(homer_result1[,1])
write.table(t(homer_result1),"D:/ATAC_peak/BIA/homer_tf/known_ATAC_tf_only_name_knownResults.txt",sep = " ",col.names = F,row.names = F,quote = F)
cd /mnt/d/ATAC_peak/BIA/homer_tf
## "known_uniprot-ATAC_tf_only_name_knownResults.fasta" is a protein fasta file.
## using "known_ATAC_tf_only_name_knownResults.txt" as name input to find protein sequences in uniprot database with arabidopsis species as ref.
blastp -db blastdp -query ./known_uniprot-ATAC_tf_only_name_knownResults.fasta -evalue 1e-5 -num_threads 4 -outfmt 6 -best_hit_score_edge 0.05 -best_hit_overhang 0.25 -qcov_hsp_perc 75.0 -out known_blast_result.txt
the following the same as 003Joint analysis of RNA-seq and ATAC-seq
library(tidyverse)
library(readxl)
library(XML)
library(bitops)
library(RCurl)
tissue_homer <- read.table("D:/ATAC_peak/BIA/homer_tf/known_blast_result.txt"
,sep = "\t",stringsAsFactors = F)
tissue_homer <- tissue_homer[tissue_homer[,3] >40,]
tissue_homer <- tissue_homer[,c(1,2)]
tissue_homer <- separate(data = tissue_homer, col = V1, into = c("1","2","3"), sep = "\\|")
tissue_homer <- tissue_homer[,c(4,2)]
colnames(tissue_homer) <- c("poppy_name","uniprot_name")
tf_information <- read.table("D:/ATAC_peak/BIA/homer_tf/uniprot_anno.txt"
,sep = "\t",stringsAsFactors = F)
colnames(tf_information) <- tf_information[1,]
tf_information <- tf_information[-1,]
tf_merge <- merge(tissue_homer,tf_information,by.x="uniprot_name",by.y = "Entry")
URL <- paste0("file:///D:/ATAC_peak/BIA/homer/BIA_3kb_motifDir/knownResults.html")
temp <- getURL(URL)
web_tissue <- htmlParse(temp)
root <- xmlRoot(web_tissue)
length_motif <- as.data.frame(xmlSApply(root[["body"]][["table"]],xmlValue))
tissue_motif_matrix <- rbind()
for (i in 2:length(t(length_motif))) {
tissue_motif_matrix <- rbind(tissue_motif_matrix,xmlToDataFrame(root[["body"]][["table"]][i]))
}
tissue_motif_matrix <- data.frame(lapply(tissue_motif_matrix,as.character),stringsAsFactors = F)
tissue_motif_matrix<- tissue_motif_matrix[,c(1,3)]
colnames(tissue_motif_matrix) <- c("rnak","Motif Name")
tissue_motif_matrix_true1 <- separate(data = tissue_motif_matrix, col = `Motif Name`, into = "TF", sep = "/")
tissue_motif_matrix_true1 <- separate(data = tissue_motif_matrix_true1, col = TF, into = "TF_name", sep = "\\(")
tf_poppy <- merge(tf_merge,tissue_motif_matrix_true1,by.x = "homer_name",by.y = "TF_name")
tf_poppy[,6] <- paste0("/mnt/d/ATAC_peak/BIA/homer/BIA_3kb_motifDir/knownResults/","known",tf_poppy[,5],".motif")
path <- as.data.frame(unique(tf_poppy[,6]))
path <- data.frame(lapply(path,as.character),stringsAsFactors = F)
len <- length(t(path))+1
path[len,1] <- "test"
write.csv(tf_poppy,"D:/ATAC_peak/BIA/homer_tf/regulatory_network/homer_tf_all_known.csv",
row.names = F,quote = F)
write.table(t(path),"D:/ATAC_peak/BIA/homer_tf/regulatory_network/path_to_motif/homer_tf_path_known.txt",
sep = " ",col.names = F,row.names = F,quote = F)
conda activate homer
cd /mnt/d/ATAC_peak/BIA/homer_tf/regulatory_network/path_to_motif/
ls | while read id;do(cat $id |while read id2;do(cat $id2 >> /mnt/d/ATAC_peak/BIA/homer_tf/regulatory_network/$id.motifs);done);done
cd ..
annotatePeaks.pl ./BIA_3kb.bed Papaver_somniferum.ref.fa -m homer_tf_path_known.txt.motifs > homer_tf_path_known.txt.motifs.txt
options(stringsAsFactors = FALSE)
motif_binding <- read.csv("D:/ATAC_peak/BIA/homer_tf/regulatory_network/homer_tf_path_known.txt.motifs.txt",
sep="\t",stringsAsFactors = F)
a <- as.data.frame(table(motif_binding[,1]))
motif_binding <- as.data.frame(t(motif_binding[c(1,22:length(colnames(motif_binding)))]))
add_col_num <- dim(motif_binding)[2]+1
motif_binding[,add_col_num] <- rownames(motif_binding)
motif_binding <- separate(data = motif_binding, col = paste0("V",add_col_num), into = "TF_name", sep = "\\.")
a <- as.data.frame(table(motif_binding[,add_col_num]))
rownames(motif_binding)[7]
motif_binding[7,add_col_num] <- paste0(motif_binding[7,add_col_num],"_","DAP")
rownames(motif_binding)[8]
motif_binding[8,add_col_num] <- paste0(motif_binding[8,add_col_num],"_","CHIP")
motif_binding <- as.data.frame(t(motif_binding))
result_data <- rbind()
for (tf in 2:dim(motif_binding)[2]) {
print(tf)
result_data_1 <- rbind()
for(target_gene in 1:(dim(motif_binding)[1]-1)){
if(motif_binding[target_gene,tf] !=""){
result_data_2 <- motif_binding[target_gene,1:2]
result_data_2[1,2] <- as.character(motif_binding[dim(motif_binding)[1],tf])
colnames(result_data_2) <- c("peak","tf")
result_data <- rbind(result_data,result_data_2)
}
}
}
peak <- read.csv("D:/ATAC_peak/BIA/homer_tf/regulatory_network/BIA_3kb.bed",
sep = "\t",header = F)
peak <- peak[,c(4,5)]
result_data <- merge(result_data,peak,by.x="peak",by.y="V4")
colnames(result_data) <- c("peak","tf","target_gene")
write.csv(result_data,"D:/ATAC_peak/BIA/homer_tf/regulatory_network/bia_tf_gene_network.csv")
result_data_table <- result_data[,c(2,3)]
result_data_table <- unique(result_data_table)
result_data_table <- as.data.frame(table(result_data_table[,1]))
poppy_tf <- read.csv("D:/ATAC_peak/BIA/homer_tf/regulatory_network/homer_tf_all_known.csv")
poppy_tf <- poppy_tf[,c(1,2,3)]
poppy_tf[c(16,18,20),1] <- "ATHB6_DAP"
poppy_tf[c(17,19,21),1] <- "ATHB6_CHIP"
result_data <- merge(result_data,poppy_tf,by.x="tf",by.y="homer_name")
write.csv(result_data,"D:/ATAC_peak/BIA/homer_tf/regulatory_network/bia_tf_gene_network_all_infor.csv")
bedtools getfasta -fi /mnt/d/001poppy_atac_new_genome/Papaver_somniferum.ref.fa -bed ./hb6_getfasta_2.bed -s -name > hb6_common_motif_2.fasta
library(ggplot2)
library(ggmsa)
library(seqmagick)
seqence <- seqmagick::fa_read("D:/001poppy_atac_new_genome/ATAC_peak/BIA/homer/hb6_common_motif_2.fasta")
color_data <- as.data.frame(matrix(0,4,2))
colnames(color_data) <- c("names","color")
color_data[,1] <- c("A","T","G","C")
color_data[,2] <- c("white","white","white","white")
my_cutstom <- color_data
pdf("motif_seqence_flanking_1_10.pdf")
ggmsa(seqence,1,10,custom_color=my_cutstom ,seq_name =TRUE)
##+geom_seqlogo(color = "Taylor_NT") + geom_msaBar()
dev.off()
pdf("motif_seqence_flanking_11_20.pdf")
ggmsa(seqence, 11,20, color="Chemistry_NT",seq_name =TRUE)
##+geom_seqlogo(color = "Taylor_NT") + geom_msaBar()
dev.off()
pdf("motif_seqence_flanking_21_30.pdf")
ggmsa(seqence,21,30,custom_color=my_cutstom ,seq_name =TRUE)
##+geom_seqlogo(color = "Taylor_NT") + geom_msaBar()
dev.off()