Skip to content

Latest commit

 

History

History
133 lines (101 loc) · 3.85 KB

13e_GenePrediction_GeneStatistics.md

File metadata and controls

133 lines (101 loc) · 3.85 KB

Compare output with existing annotations


### Mikado.loci.gff3 round2 clustered by cufflinks

#gene length less mikado.loci.ancestral.gff3 |awk '$3=="locus"' |grep -v "#"|awk '{if($4>$5){print $4-$5} else {print $5-$4}}' |summary.sh Total: 102,643,360 Count: 22,465 Mean: 4,569 Median: 2,065 Min: 220 Max: 334,901

#transcript length less mikado.loci.ancestral.gff3|awk '$3=="mRNA"' |grep -v "#"|awk '{if($4>$5){print $4-$5} else {print $5-$4}}' |summary.sh Total: 103,726,162 Count: 23,933 Mean: 4,334 Median: 1,957 Min: 220 Max: 334,904

#exons per transcript less mikado.loci.ancestral.gff3|awk '$3=="CDS" ' |cut -f 9 |sed 's/./\t/3' |awk '{print $1}' |sort|uniq -c |awk '{print $1}' |summary.sh Total: 179,476 Count: 23,933 Mean: 7 Median: 5 Min: 1 Max: 194

#Exon length less mikado.loci.ancestral.gff3|awk '$3=="exon"' |grep -v "#"|awk '{if($4>$5){print $4-$5} else {print $5-$4}}' |summary.sh Total: 28,809,023 Count: 182,837 Mean: 157 Median: 128 Min: 1 Max: 5,764


### gene stats of braker masked annotation

less augustus.hints.gff |awk '$3=="gene"' |grep -v "#"|awk '{if($4>$5){print $4-$5} else {print $5-$4}}' |summary.sh Total: 67,011,405 Count: 22,408 Mean: 2,990 Median: 2,015 Min: 200 Max: 52,212


### gene stats of braker unmasked annotation

less augustus.hints.gff |awk '$3=="gene"' |grep -v "#"|awk '{if($4>$5){print $4-$5} else {print $5-$4}}' |summary.sh Total: 83,144,932 Count: 35,514 Mean: 2,341 Median: 1,546 Min: 200 Max: 43,263


### gene stats of maker annotation on old dovetail genome

/work/GIF/remkv6/Baum/01_SCNDovetailScaffolding

less 12_MakerGenesOrthofinder/DovetailSCNMaker4.all.NOFASTA.gff|awk '$3=="gene"' |grep -v "#"|awk '{if($4>$5){print $4-$5} else {print $5-$4}}' |summary.sh Total: 67,168,685 Count: 22,856 Mean: 2,938 Median: 1,811 Min: 5 Max: 65,204




### gene calls from 738 genome

less ../CamTechGenomeComparison/58_Renamatorium/1_genomeNgff/fixed.augustus.gff3|awk '$3=="gene"' |grep -v "#"|awk '{if($4>$5){print $4-$5} else {print $5-$4}}' |summary.sh Total: 71,832,760 Count: 29,769 Mean: 2,413 Median: 1,603 Min: 79 Max: 65,717


### Functional annotation stats -- needs updated 7/30/20

#interproscan less 01_Interpro/interproAnnot.tsv |awk '{print $1}' |sort|uniq|wc 25779 25779 639842

#proteins to uniprot less 04_ProtsUniprot/mikado_proteins.vs.uniprot_sprot.cul5.1e5.blastp.out |grep -v "hypothetical" |grep -v "uncharacterized" |awk '{print $1 }' |sort|uniq|wc 1604 1604 39727

#transcripts to uniref less 05_TransUniprot/mikado_transcripts.vs.uniprot_sprot.cul5.1e5.blastx.out |grep -v "hypothetical" |grep -v "uncharacterized" |awk '{print $1 }' |sort|uniq|wc 12796 12796 317987

#prots to nr less 02_Prots2Nr/mikado_proteinsFixed.vs. nr.cul5.1e5.blastp.out |grep -v "hypothetical" |grep -v "uncharacterized" |aw k '{print $1}' |sort|uniq|wc 3056 3056 75814

#transcripts to nt ess 03_Transcrips2Nt/mikado_transcripts.vs.nt.cul5.1e5.blastn.out |grep -v "hypothetical" |grep -v "uncharacterized" |awk '{print $1}' |sort|uniq|wc 2266 2266 56298

#All databases together cat <(less 01_Interpro/interproAnnot.tsv |awk '{print $1}' |sort|uniq) <(less 02_Prots2Nr/mikado_proteinsFixed.vs.nr.cul5.1e5.blastp.out |grep -v "hypothetical" |grep -v "uncharacterized" |awk '{print $1}' |sort|uniq) <( less 03_Transcrips2Nt/mikado_transcripts.vs.nt.cul5.1e5.blastn.out |grep -v "hypothetical" |grep -v "uncharacterized" |awk '{print $1}' |sort|uniq) <(less 04_ProtsUniprot/mikado_proteins.vs.uniprot_sprot.cul5.1e5.blastp.out |grep -v "hypothetical" |grep -v "uncharacterized" |awk '{print $1 }' |sort|uniq) <(less 05_TransUniprot/mikado_transcripts.vs.uniprot_sprot.cul5.1e5.blastx.out |grep -v "hypothetical" |grep -v "uncharacterized" |awk '{print $1 }' |sort|uniq) |sort|uniq|wc 26951 26951 668844