diff --git a/data/mibig.csv b/data/mibig.csv new file mode 100644 index 0000000..4f3da13 --- /dev/null +++ b/data/mibig.csv @@ -0,0 +1,608 @@ +BGC,ORF,A-ID,M domain,L-/D- (E domain),PRED_TOP5,START_POS,END_POS,STRAND,STRUCTURE ID,rBan STRUCTURE,rBan VERTEX,rBan AA-ID,rBan STRUCT_CONFIGURATION,rBan AA,STRUCTURE,VERTEX,AA-ID,MODIFICATION,AA +BGC0000300,-,-,-,-,-,-,-,-,BGC0000300,"0,1,2,3,4,5,6",0,Sal,NA,sal,,,,, +BGC0000300,orf00005,A1,FALSE,L,cys(90.0);orn(60.0);asn(50.0);asp(50.0);gln(50.0),5185.0,8539.0,-,BGC0000300,"0,1,2,3,4,5,6",1,@D-Ser,D,ser,"0,1,2,3,4,5",0,Ser,, +BGC0000300,orf00006,A1,FALSE,L,ser(95.0);thr(65.0);allothr(65.0);hpg(65.0);dab(60.0),8532.0,15621.0,-,BGC0000300,"0,1,2,3,4,5,6",2,@L-Ser,L,ser,"0,1,2,3,4,5",1,Ser,, +BGC0000300,orf00006,A2,FALSE,D,ser(95.0);thr(65.0);allothr(65.0);hpg(65.0);dab(60.0),8532.0,15621.0,-,BGC0000300,"0,1,2,3,4,5,6",3,@D-Ser,D,ser,"0,1,2,3,4,5",2,Ser,, +BGC0000300,orf00007,A1,FALSE,D,bmt(60.0);dht(50.0);cit(50.0);end(50.0);uda(50.0),15613.0,19819.0,-,BGC0000300,"0,1,2,3,4,5,6",6,OH-cOrn,NA,orn,"0,1,2,3,4,5",5,hOrn,, +BGC0000300,orf00008,A1,FALSE,L,ser(95.0);thr(65.0);allothr(65.0);hpg(65.0);dab(60.0),19803.0,26805.0,-,BGC0000300,"0,1,2,3,4,5,6",4,@L-Ser,L,ser,"0,1,2,3,4,5",3,Ser,, +BGC0000300,orf00008,A2,FALSE,D,orn(100.0);arg(60.0);lys(60.0);asp(50.0);met(50.0),19803.0,26805.0,-,BGC0000300,"0,1,2,3,4,5,6",5,@D-Fo-OH-Orn,D,orn,"0,1,2,3,4,5",4,hfOrn,, +BGC0000300,orf00012,A1,FALSE,L,dhb(85.0);sal(80.0);phe(40.0);tyr(40.0);phg(40.0),29168.0,30716.0,+,BGC0000300,,,,,,,,,, +BGC0000305,orf00003,A1,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),1665.0,8055.0,+,BGC0000305,"#,0,1,2,3,4,5,6,7,8,9",0,@D-Leu,D,leu,"10,9,8,7,6,5,4,3,2*1,0",0,Leu,,D-Leu +BGC0000305,orf00003,A2,FALSE,D,asp(100.0);asn(75.0);gln(75.0);glu(75.0);arg(55.0),1665.0,8055.0,+,BGC0000305,"#,0,1,2,3,4,5,6,7,8,9",1,@D-Asp,D,asp,"10,9,8,7,6,5,4,3,2*1,0",1,Asp,,D-Asp +BGC0000305,orf00004,A1,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),8236.0,21253.0,+,BGC0000305,"#,0,1,2,3,4,5,6,7,8,9",2,@D-aThr/Thr,D,thr,"10,9,8,7,6,5,4,3,2*1,0",2,Thr,,D-allo-Thr +BGC0000305,orf00004,A2,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),8236.0,21253.0,+,BGC0000305,"#,0,1,2,3,4,5,6,7,8,9",3,@D-Leu,D,leu,"10,9,8,7,6,5,4,3,2*1,0",3,Leu,,D-Leu +BGC0000305,orf00004,A3,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),8236.0,21253.0,+,BGC0000305,"#,0,1,2,3,4,5,6,7,8,9",4,@D-Leu,D,leu,"10,9,8,7,6,5,4,3,2*1,0",4,Leu,,D-Leu +BGC0000305,orf00004,A4,FALSE,D,ser(100.0);thr(70.0);hpg(65.0);allothr(60.0);dpg(60.0),8236.0,21253.0,+,BGC0000305,"#,0,1,2,3,4,5,6,7,8,9",5,@D-Ser,D,ser,"10,9,8,7,6,5,4,3,2*1,0",5,Ser,,D-Ser +BGC0000305,orf00005,A1,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),21249.0,39093.0,+,BGC0000305,"#,0,1,2,3,4,5,6,7,8,9",6,Leu,NA,leu,"10,9,8,7,6,5,4,3,2*1,0",6,Leu,,L-Leu +BGC0000305,orf00005,A2,FALSE,D,ser(100.0);thr(70.0);hpg(65.0);allothr(60.0);dpg(60.0),21249.0,39093.0,+,BGC0000305,"#,0,1,2,3,4,5,6,7,8,9",7,@D-Ser,D,ser,"10,9,8,7,6,5,4,3,2*1,0",7,Ser,,D-Ser +BGC0000305,orf00005,A3,FALSE,L,ile(100.0);leu(80.0);val(80.0);abu(70.0);ala(60.0),21249.0,39093.0,+,BGC0000305,"#,0,1,2,3,4,5,6,7,8,9",8,@L-Ile/aIle,L,ile,"10,9,8,7,6,5,4,3,2*1,0",8,Ile,,L-Ile +BGC0000305,orf00005,A4,FALSE,L,ile(100.0);leu(80.0);val(80.0);abu(70.0);ala(60.0),21249.0,39093.0,+,BGC0000305,"#,0,1,2,3,4,5,6,7,8,9",9,@L-Ile/aIle,L,ile,"10,9,8,7,6,5,4,3,2*1,0",9,Ile,,L-Ile +BGC0000305,orf00005,A5,FALSE,L,asp(100.0);asn(75.0);gln(75.0);glu(75.0);arg(55.0),21249.0,39093.0,+,BGC0000305,"#,0,1,2,3,4,5,6,7,8,9",10,@L-Asp,L,asp,"10,9,8,7,6,5,4,3,2*1,0",10,Asp,,L-Asp +BGC0000306,orf00001,A1,TRUE,D,ser(100.0);thr(65.0);hpg(65.0);dab(60.0);dpg(60.0),0.0,10458.0,-,BGC0000306,"#,0,1,2,3,4",0,@D-NMe-Ser,D,ser+MT,"0,1,2,3,4,5",0,Ser,, +BGC0000306,orf00001,A2,FALSE,D,ala(90.0);gly(80.0);leu(70.0);val(70.0);ala-d(65.0),0.0,10458.0,-,BGC0000306,"#,0,1,2,3,4",1,@D-Ala,D,ala,"0,1,2,3,4,5",1,Ala,, +BGC0000306,orf00003,A1,FALSE,L,gly(95.0);ala(70.0);ile(65.0);val(65.0);leu(60.0),12314.0,25208.0,+,BGC0000306,"#,0,1,2,3,4",2,Gly,NA,gly,"0,1,2,3,4,5",2,Gly,, +BGC0000306,orf00003,A2,TRUE,L,hpg(95.0);dpg(90.0);dhpg(80.0);dhp(80.0);ser(65.0),12314.0,25208.0,+,BGC0000306,"#,0,1,2,3,4",3,@L-NMe-Hpg,L,hpg+MT,"0,1,2,3,4,5",3,Hpg,, +BGC0000306,orf00003,A3,FALSE,L,ala(90.0);gly(80.0);leu(70.0);val(70.0);ala-d(65.0),12314.0,25208.0,+,BGC0000306,"#,0,1,2,3,4",4,@L-Ala,L,ala,"0,1,2,3,4,5",4,Ala/Gly,, +BGC0000306,orf00003,A4,FALSE,L,tyr(95.0);bht(95.0);phe(85.0);trp(70.0);uda(60.0),12314.0,25208.0,+,BGC0000306,"#,0,1,2,3,4",5,@L-Tyr,L,tyr,"0,1,2,3,4,5",5,Tyr,, +BGC0000307,orf00001,A1,FALSE,L,ile(65.0);leu(65.0);val(65.0);gly(60.0);ala(55.0),505.0,35485.0,+,BGC0000307,"#,0,1,2,3,4,5,6,7",-,-,-,-,"0,1,2,3,4,5,6,7,8*",0,Hmp,,DHmp +BGC0000307,orf00001,A2,TRUE,L,val(95.0);leu(75.0);ala(70.0);pro(70.0);abu(70.0),505.0,35485.0,+,BGC0000307,"#,0,1,2,3,4,5,6,7",1,@L-NMe-Val,L,val+MT,"0,1,2,3,4,5,6,7,8*",1,Val,Methylation,MeVal +BGC0000307,orf00001,A3,FALSE,L,val(55.0);ala(50.0);leu(50.0);ser(50.0);abu(50.0),505.0,35485.0,+,BGC0000307,"#,0,1,2,3,4,5,6,7",2,@L-Phe,L,phe,"0,1,2,3,4,5,6,7,8*",2,Phe,,Phe +BGC0000307,orf00001,A4,TRUE,L,val(55.0);ala(50.0);leu(50.0);ser(50.0);abu(50.0),505.0,35485.0,+,BGC0000307,"#,0,1,2,3,4,5,6,7",3,@L-NMe-Phe,L,phe+MT,"0,1,2,3,4,5,6,7,8*",3,Phe,Methylation,MePhe +BGC0000307,orf00001,A5,FALSE,L,dab(60.0);aeo(40.0);apa(40.0);ser(35.0);ala(30.0),505.0,35485.0,+,BGC0000307,"#,0,1,2,3,4,5,6,7",4,@L-Pro,L,pro,"0,1,2,3,4,5,6,7,8*",4,Pro,,Pro +BGC0000307,orf00001,A6,FALSE,L,leu(80.0);ile(75.0);val(75.0);ala(65.0);gly(65.0),505.0,35485.0,+,BGC0000307,"#,0,1,2,3,4,5,6,7",5,@D-Ile/aIle,D,ile,"0,1,2,3,4,5,6,7,8*",5,alle,,L-allo-Ile +BGC0000307,orf00001,A7,TRUE,L,val(95.0);leu(75.0);ala(70.0);pro(70.0);abu(70.0),505.0,35485.0,+,BGC0000307,"#,0,1,2,3,4,5,6,7",6,@L-NMe-Val,L,val+MT,"0,1,2,3,4,5,6,7,8*",6,Val,Methylation,MeVal +BGC0000307,orf00001,A8,FALSE,L,leu(90.0);val(80.0);bmt(80.0);ala-d(70.0);ile(70.0),505.0,35485.0,+,BGC0000307,"#,0,1,2,3,4,5,6,7",7,@L-Leu,L,leu,"0,1,2,3,4,5,6,7,8*",7,Leu,,Leu +BGC0000307,orf00001,A9,TRUE,L,val(95.0);leu(75.0);ala(70.0);pro(70.0);abu(70.0),505.0,35485.0,+,BGC0000307,"#,0,1,2,3,4,5,6,7",8,@L-bOH-NMe-Val,L,val+MT+unk,"0,1,2,3,4,5,6,7,8*",8,Val,Methylation,L-OH-MeVal +BGC0000310,orf00003,A1,FALSE,L,ile(100.0);leu(80.0);val(75.0);ala(70.0);abu(70.0),1492.0,17218.0,+,BGC0000310,"#,0,1,2,3,4,5,6,7,8,9,10",0,Ile/aIle,NA,ile,"11,10,9,8,7,6,5*4,3,2,1,0",0,Ile,, +BGC0000310,orf00003,A2,FALSE,L,cys(100.0);ser(60.0);ala(55.0);gly(55.0);thr(50.0),1492.0,17218.0,+,BGC0000310,"#,0,1,2,3,4,5,6,7,8,9,10",1,Cys,NA,cys,"11,10,9,8,7,6,5*4,3,2,1,0",1,Cys,, +BGC0000310,orf00003,A3,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),1492.0,17218.0,+,BGC0000310,"#,0,1,2,3,4,5,6,7,8,9,10",2,Leu,NA,leu,"11,10,9,8,7,6,5*4,3,2,1,0",2,Leu,, +BGC0000310,orf00003,A4,FALSE,D,glu(100.0);gln(85.0);asp(75.0);uda(70.0);asn(65.0),1492.0,17218.0,+,BGC0000310,"#,0,1,2,3,4,5,6,7,8,9,10",3,Glu,NA,glu,"11,10,9,8,7,6,5*4,3,2,1,0",3,D-Leu,, +BGC0000310,orf00003,A5,FALSE,L,ile(100.0);leu(80.0);val(75.0);ala(70.0);abu(70.0),1492.0,17218.0,+,BGC0000310,"#,0,1,2,3,4,5,6,7,8,9,10",4,Ile/aIle,NA,ile,"11,10,9,8,7,6,5*4,3,2,1,0",4,Ile,, +BGC0000310,orf00004,A1,FALSE,L,lys(100.0);arg(75.0);orn(70.0);end(70.0);glu(55.0),17344.0,25153.0,+,BGC0000310,"#,0,1,2,3,4,5,6,7,8,9,10",5,Lys,NA,lys,"11,10,9,8,7,6,5*4,3,2,1,0",5,Lys,, +BGC0000310,orf00004,A2,FALSE,D,orn(100.0);dab(80.0);arg(75.0);lys(70.0);glu(55.0),17344.0,25153.0,+,BGC0000310,"#,0,1,2,3,4,5,6,7,8,9,10",6,Orn,NA,orn,"11,10,9,8,7,6,5*4,3,2,1,0",6,D-Orn,, +BGC0000310,orf00005,A1,FALSE,L,ile(100.0);leu(75.0);val(70.0);abu(70.0);ala(65.0),25257.0,44337.0,+,BGC0000310,"#,0,1,2,3,4,5,6,7,8,9,10",7,Ile/aIle,NA,ile,"11,10,9,8,7,6,5*4,3,2,1,0",7,Ile,, +BGC0000310,orf00005,A2,FALSE,D,phe(100.0);tyr(85.0);bht(85.0);trp(75.0);uda(60.0),25257.0,44337.0,+,BGC0000310,"#,0,1,2,3,4,5,6,7,8,9,10",8,Phe,NA,phe,"11,10,9,8,7,6,5*4,3,2,1,0",8,D-Phe,, +BGC0000310,orf00005,A3,FALSE,L,tyr(85.0);his(75.0);bht(70.0);phe(65.0);trp(65.0),25257.0,44337.0,+,BGC0000310,"#,0,1,2,3,4,5,6,7,8,9,10",9,His,NA,his,"11,10,9,8,7,6,5*4,3,2,1,0",9,His,, +BGC0000310,orf00005,A4,FALSE,D,asp(100.0);asn(85.0);gln(60.0);glu(60.0);lys(55.0),25257.0,44337.0,+,BGC0000310,"#,0,1,2,3,4,5,6,7,8,9,10",10,Asp,NA,asp,"11,10,9,8,7,6,5*4,3,2,1,0",10,D-Asp,, +BGC0000310,orf00005,A5,FALSE,L,asn(100.0);asp(85.0);glu(65.0);gln(60.0);aad(55.0),25257.0,44337.0,+,BGC0000310,"#,0,1,2,3,4,5,6,7,8,9,10",11,Asn,NA,asn,"11,10,9,8,7,6,5*4,3,2,1,0",11,Asn,, +BGC0000312,orf00028,A1,FALSE,L,hyv(90.0);gly(65.0);val(60.0);ala(55.0);leu(55.0),21244.0,30685.0,+,BGC0000312,"#,0",0,Hiv,NA,hiv,"0,1,2,3,4,5,6,7*",0,D-Hiv (from L-val),, +BGC0000312,orf00028,A1,FALSE,L,hyv(90.0);gly(65.0);val(60.0);ala(55.0);leu(55.0),21244.0,30685.0,+,BGC0000312,"#,0",0,Hiv,NA,hiv,"0,1,2,3,4,5,6,7*",2,D-Hiv (from L-val),, +BGC0000312,orf00028,A1,FALSE,L,hyv(90.0);gly(65.0);val(60.0);ala(55.0);leu(55.0),21244.0,30685.0,+,BGC0000312,"#,0",0,Hiv,NA,hiv,"0,1,2,3,4,5,6,7*",4,D-Hiv (from L-val),, +BGC0000312,orf00028,A1,FALSE,L,hyv(90.0);gly(65.0);val(60.0);ala(55.0);leu(55.0),21244.0,30685.0,+,BGC0000312,"#,0",0,Hiv,NA,hiv,"0,1,2,3,4,5,6,7*",6,D-Hiv (from L-val),, +BGC0000312,orf00028,A2,TRUE,L,vol(60.0);cha(60.0);met(50.0);dht(50.0);bmt(50.0),21244.0,30685.0,+,BGC0000312,"#,0",1,NMe-Leu,NA,leu+MT,"0,1,2,3,4,5,6,7*",1,N-Me-Leu,, +BGC0000312,orf00028,A2,TRUE,L,vol(60.0);cha(60.0);met(50.0);dht(50.0);bmt(50.0),21244.0,30685.0,+,BGC0000312,"#,0",1,NMe-Leu,NA,leu+MT,"0,1,2,3,4,5,6,7*",3,N-Me-Leu,, +BGC0000312,orf00028,A2,TRUE,L,vol(60.0);cha(60.0);met(50.0);dht(50.0);bmt(50.0),21244.0,30685.0,+,BGC0000312,"#,0",1,NMe-Leu,NA,leu+MT,"0,1,2,3,4,5,6,7*",5,N-Me-Leu,, +BGC0000312,orf00028,A2,TRUE,L,vol(60.0);cha(60.0);met(50.0);dht(50.0);bmt(50.0),21244.0,30685.0,+,BGC0000312,"#,0",1,NMe-Leu,NA,leu+MT,"0,1,2,3,4,5,6,7*",7,N-Me-Leu,, +BGC0000323,-,-,,,,,,,BGC0000323,"#,0,1,2,3,4,5,6,7",0,@L-His,L,his,,,,, +BGC0000323,orf00003,A1,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),1141.0,10639.0,+,BGC0000323,"#,0,1,2,3,4,5,6,7",1,@L-Leu,L,leu,"0,1,2,3,4,5,6,7",0,Leu,, +BGC0000323,orf00003,A2,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),1141.0,10639.0,+,BGC0000323,"#,0,1,2,3,4,5,6,7",2,@L-Leu,L,leu,"0,1,2,3,4,5,6,7",1,Leu,, +BGC0000323,orf00003,A3,FALSE,D,asp(75.0);gln(75.0);glu(75.0);asn(70.0);arg(60.0),1141.0,10639.0,+,BGC0000323,"#,0,1,2,3,4,5,6,7",3,@L-Gln,L,gln,"0,1,2,3,4,5,6,7",2,Gln,, +BGC0000323,orf00004,A1,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),10635.0,28374.0,+,BGC0000323,"#,0,1,2,3,4,5,6,7",4,@L-Leu,L,leu,"0,1,2,3,4,5,6,7",3,Leu,, +BGC0000323,orf00004,A2,FALSE,D,asp(80.0);asn(75.0);gln(75.0);glu(75.0);arg(60.0),10635.0,28374.0,+,BGC0000323,"#,0,1,2,3,4,5,6,7",5,@L-Gln,L,gln,"0,1,2,3,4,5,6,7",4,Gln,, +BGC0000323,orf00004,A3,FALSE,L,val(85.0);ile(80.0);leu(75.0);abu(75.0);ala(65.0),10635.0,28374.0,+,BGC0000323,"#,0,1,2,3,4,5,6,7",6,@L-Val,L,val,"0,1,2,3,4,5,6,7",5,Val,, +BGC0000323,orf00004,A4,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),10635.0,28374.0,+,BGC0000323,"#,0,1,2,3,4,5,6,7",7,@L-Leu,L,leu,"0,1,2,3,4,5,6,7",6,Leu,, +BGC0000323,orf00004,A5,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),10635.0,28374.0,+,BGC0000323,"#,0,1,2,3,4,5,6,7",8,@L-Leu,L,leu,"0,1,2,3,4,5,6,7",7,Leu,, +BGC0000330,orf00008,A1,FALSE,L,gly(95.0);ala(75.0);ile(70.0);leu(65.0);val(65.0),8970.0,12198.0,-,BGC0000330,"#,0,1,2,3",3,Gly,NA,gly,"0,1,2,3,4",3,Gly,, +BGC0000330,orf00010,A1,FALSE,D,gln(65.0);glu(65.0);orn(65.0);asn(60.0);asp(60.0),13216.0,19243.0,-,BGC0000330,"#,0,1,2,3",4,@L-OH-Orn,L,orn+unk,"0,1,2,3,4",4,L-N-hydroxy-N-(3-hydroxybutyryl)-ornithine,, +BGC0000330,orf00011,A1,FALSE,L,asp(95.0);asn(85.0);gln(60.0);glu(60.0);lys(50.0),19280.0,22244.0,-,BGC0000330,"#,0,1,2,3",2,@L-OH-Asp,L,asp+unk,"0,1,2,3,4",2,L-Asp,, +BGC0000330,orf00012,A1,FALSE,L,asp(100.0);asn(90.0);glu(65.0);gln(60.0);cha(60.0),22368.0,31653.0,-,BGC0000330,"#,0,1,2,3",0,@L-OH-Asp,L,asp+unk,"0,1,2,3,4",0,L-Asp,, +BGC0000330,orf00012,A2,FALSE,L,arg(50.0);asp(50.0);ser(50.0);orn(50.0);dab(50.0),22368.0,31653.0,-,BGC0000330,"#,0,1,2,3",1,@L-Dab,L,dab,"0,1,2,3,4",1,L-Dab,, +BGC0000332,orf00003,A1,FALSE,L,gln(100.0);glu(80.0);asp(65.0);asn(60.0);arg(55.0),3991.0,7249.0,+,BGC0000332,"#,0,1,2,3,4,5",0,@L-Ile/aIle,L,ile,"6,5,4,3,2,1*0",0,Acylated-Gln,, +BGC0000332,orf00004,A1,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),7263.0,10569.0,+,BGC0000332,"#,0,1,2,3,4,5",1,@L-aThr/Thr,L,thr,"6,5,4,3,2,1*0",1,Thr,, +BGC0000332,orf00005,A1,FALSE,L,arg(75.0);gln(75.0);lys(75.0);glu(70.0);uda(70.0),10565.0,24731.0,+,BGC0000332,"#,0,1,2,3,4,5",2,@L-Ile/aIle,L,ile,"6,5,4,3,2,1*0",2,Ile/Thr,, +BGC0000332,orf00005,A2,FALSE,L,ahp(100.0);gln(70.0);glu(70.0);asp(65.0);asn(60.0),10565.0,24731.0,+,BGC0000332,"#,0,1,2,3,4,5",3,@L-Glu,L,glu,"6,5,4,3,2,1*0",3,Ahp,, +BGC0000332,orf00005,A3,FALSE,L,phe(95.0);tyr(85.0);bht(80.0);trp(75.0);uda(70.0),10565.0,24731.0,+,BGC0000332,"#,0,1,2,3,4,5",4,@L-aThr/Thr,L,thr,"6,5,4,3,2,1*0",4,Phe,, +BGC0000332,orf00005,A4,TRUE,L,bht(95.0);tyr(90.0);phe(85.0);trp(65.0);vol(60.0),10565.0,24731.0,+,BGC0000332,"#,0,1,2,3,4,5",5,@L-NMe-Phe,L,phe+MT,"6,5,4,3,2,1*0",5,Tyr+CH3+Cl,, +BGC0000332,orf00007,A1,FALSE,L,ile(100.0);leu(85.0);val(85.0);abu(70.0);ala(65.0),26601.0,30858.0,+,BGC0000332,"#,0,1,2,3,4,5",6,@L-Glu,L,glu,"6,5,4,3,2,1*0",6,Gln/Ile,, +BGC0000336,orf00045,A1,FALSE,L,tyr(75.0);bht(70.0);phe(65.0);trp(65.0);phg(55.0),51567.0,69060.0,+,BGC0000336,"#,0,1,2,3,4,5,6,7,8,9,10,11",0,@L-Trp,L,trp,"12,11,10,9,8,7,6,5,4*3,2,1,0",0,Trp,, +BGC0000336,orf00045,A2,FALSE,D,asn(95.0);asp(90.0);gln(65.0);glu(65.0);orn(55.0),51567.0,69060.0,+,BGC0000336,"#,0,1,2,3,4,5,6,7,8,9,10,11",1,@D-Asn,D,asn,"12,11,10,9,8,7,6,5,4*3,2,1,0",1,D-Asn,, +BGC0000336,orf00045,A3,FALSE,L,asn(95.0);asp(95.0);gln(65.0);glu(65.0);orn(60.0),51567.0,69060.0,+,BGC0000336,"#,0,1,2,3,4,5,6,7,8,9,10,11",2,@L-Asp,L,asp,"12,11,10,9,8,7,6,5,4*3,2,1,0",2,Asp,, +BGC0000336,orf00045,A4,FALSE,L,thr(100.0);allothr(90.0);ser(80.0);dht(80.0);hpg(60.0),51567.0,69060.0,+,BGC0000336,"#,0,1,2,3,4,5,6,7,8,9,10,11",3,@L-aThr/Thr,L,thr,"12,11,10,9,8,7,6,5,4*3,2,1,0",3,Thr,, +BGC0000336,orf00045,A5,FALSE,L,gly(95.0);ala(70.0);ile(65.0);val(65.0);leu(60.0),51567.0,69060.0,+,BGC0000336,"#,0,1,2,3,4,5,6,7,8,9,10,11",4,Gly,NA,gly,"12,11,10,9,8,7,6,5,4*3,2,1,0",4,Gly,, +BGC0000336,orf00046,A1,FALSE,L,glu(65.0);arg(55.0);asp(55.0);gln(55.0);lys(50.0),69053.0,91073.0,+,BGC0000336,"#,0,1,2,3,4,5,6,7,8,9,10,11",5,@L-Orn,L,orn,"12,11,10,9,8,7,6,5,4*3,2,1,0",5,Orn,, +BGC0000336,orf00046,A2,FALSE,L,asn(95.0);asp(95.0);gln(65.0);glu(65.0);orn(60.0),69053.0,91073.0,+,BGC0000336,"#,0,1,2,3,4,5,6,7,8,9,10,11",6,@L-Asp,L,asp,"12,11,10,9,8,7,6,5,4*3,2,1,0",6,Asp,, +BGC0000336,orf00046,A3,FALSE,D,ala(85.0);gly(70.0);val(70.0);ala-d(65.0);ile(60.0),69053.0,91073.0,+,BGC0000336,"#,0,1,2,3,4,5,6,7,8,9,10,11",7,@D-Ala,D,ala,"12,11,10,9,8,7,6,5,4*3,2,1,0",7,D-Ala,, +BGC0000336,orf00046,A4,FALSE,L,asn(95.0);asp(95.0);gln(65.0);glu(65.0);orn(60.0),69053.0,91073.0,+,BGC0000336,"#,0,1,2,3,4,5,6,7,8,9,10,11",8,@L-Asp,L,asp,"12,11,10,9,8,7,6,5,4*3,2,1,0",8,Asp,, +BGC0000336,orf00046,A5,FALSE,L,gly(95.0);ala(65.0);leu(65.0);val(65.0);ala-d(55.0),69053.0,91073.0,+,BGC0000336,"#,0,1,2,3,4,5,6,7,8,9,10,11",9,Gly,NA,gly,"12,11,10,9,8,7,6,5,4*3,2,1,0",9,Gly,, +BGC0000336,orf00046,A6,FALSE,D,ser(95.0);thr(65.0);allothr(65.0);hpg(65.0);dab(60.0),69053.0,91073.0,+,BGC0000336,"#,0,1,2,3,4,5,6,7,8,9,10,11",10,@D-Ser,D,ser,"12,11,10,9,8,7,6,5,4*3,2,1,0",10,D-Ser,, +BGC0000336,orf00047,A1,FALSE,L,asn(75.0);asp(75.0);glu(60.0);lys(55.0);orn(50.0),91069.0,98209.0,+,BGC0000336,"#,0,1,2,3,4,5,6,7,8,9,10,11",11,@L-3Me-Glu,L,glu+MT,"12,11,10,9,8,7,6,5,4*3,2,1,0",11,mGlu,, +BGC0000336,orf00047,A2,FALSE,L,phe(80.0);trp(70.0);tyr(70.0);bht(70.0);bmt(70.0),91069.0,98209.0,+,BGC0000336,"#,0,1,2,3,4,5,6,7,8,9,10,11",12,@L-Kyn,L,kyn,"12,11,10,9,8,7,6,5,4*3,2,1,0",12,Kyn,, +BGC0000342,orf00001,A1,FALSE,L,hyv(100.0);ala(65.0);gly(60.0);val(60.0);ala-d(55.0),1020.0,10416.0,+,BGC0000342,"#,0,1,2,3,4",0,Hiv,NA,hiv,"0,1,2,3,4,5*",0,,, +BGC0000342,orf00001,A1,FALSE,L,hyv(100.0);ala(65.0);gly(60.0);val(60.0);ala-d(55.0),1020.0,10416.0,+,BGC0000342,"#,0,1,2,3,4",2,Hiv,NA,hiv,"0,1,2,3,4,5*",2,,, +BGC0000342,orf00001,A1,FALSE,L,hyv(100.0);ala(65.0);gly(60.0);val(60.0);ala-d(55.0),1020.0,10416.0,+,BGC0000342,"#,0,1,2,3,4",4,Hiv,NA,hiv,"0,1,2,3,4,5*",4,,, +BGC0000342,orf00001,A2,TRUE,L,ile(70.0);leu(70.0);val(70.0);ala(65.0);gly(65.0),1020.0,10416.0,+,BGC0000342,"#,0,1,2,3,4",1,@L-NMe-Val,L,val+MT,"0,1,2,3,4,5*",1,,, +BGC0000342,orf00001,A2,TRUE,L,ile(70.0);leu(70.0);val(70.0);ala(65.0);gly(65.0),1020.0,10416.0,+,BGC0000342,"#,0,1,2,3,4",3,@L-NMe-Val,L,val+MT,"0,1,2,3,4,5*",3,,, +BGC0000342,orf00001,A2,TRUE,L,ile(70.0);leu(70.0);val(70.0);ala(65.0);gly(65.0),1020.0,10416.0,+,BGC0000342,"#,0,1,2,3,4",5,@L-NMe-Val,L,val+MT,"0,1,2,3,4,5*",5,,, +BGC0000367,-,-,,,,,,,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",12,@L-Trp,L,trp,,,,, +BGC0000367,-,-,,,,,,,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",13,@D-Leu,D,leu,,,,, +BGC0000367,orf00009,A1,FALSE,L,leu(80.0);ala(75.0);gly(75.0);ile(75.0);val(65.0),10658.0,17549.0,+,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",0,@L-NFo-Val,L,val+unk,,,,, +BGC0000367,orf00009,A2,FALSE,D,ala(55.0);cys(55.0);gly(50.0);ile(45.0);pro(45.0),10658.0,17549.0,+,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",1,Gly,NA,gly,,,,, +BGC0000367,orf00010,A1,FALSE,L,cys(70.0);ala(55.0);pro(55.0);gly(50.0);ser(50.0),17619.0,40791.0,+,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",2,@L-Ala,L,ala,,,,, +BGC0000367,orf00010,A2,FALSE,D,leu(95.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),17619.0,40791.0,+,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",3,@D-Leu,D,leu,,,,, +BGC0000367,orf00010,A3,FALSE,L,ala(100.0);gly(70.0);ala-d(65.0);ile(60.0);val(60.0),17619.0,40791.0,+,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",4,@L-Ala,L,ala,,,,, +BGC0000367,orf00010,A4,FALSE,D,val(100.0);ile(85.0);leu(80.0);abu(80.0);ala(65.0),17619.0,40791.0,+,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",5,@D-Val,D,val,,,,, +BGC0000367,orf00010,A5,FALSE,L,ala(100.0);gly(70.0);ala-d(65.0);ile(60.0);val(60.0),17619.0,40791.0,+,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",6,@L-Val,L,val,,,,, +BGC0000367,orf00010,A6,FALSE,D,val(100.0);ile(85.0);leu(80.0);abu(80.0);ala(65.0),17619.0,40791.0,+,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",7,@L-Val,L,val,,,,, +BGC0000367,orf00011,A1,FALSE,L,val(100.0);ile(85.0);leu(80.0);abu(80.0);ala(65.0),40795.0,59995.0,+,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",8,@L-Trp,L,trp,,,,, +BGC0000367,orf00011,A2,FALSE,D,val(100.0);ile(85.0);leu(80.0);abu(80.0);ala(65.0),40795.0,59995.0,+,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",9,@D-Leu,D,leu,,,,, +BGC0000367,orf00011,A3,FALSE,L,leu(75.0);val(70.0);ala(65.0);gly(65.0);ile(65.0),40795.0,59995.0,+,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",10,@L-Trp,L,trp,,,,, +BGC0000367,orf00011,A4,FALSE,D,leu(95.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),40795.0,59995.0,+,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",11,@D-Leu,D,leu,,,,, +BGC0000367,orf00011,A5,FALSE,L,arg(60.0);apa(60.0);ahp(60.0);orn(55.0);asn(50.0),40795.0,59995.0,+,BGC0000367,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14",14,@L-Trp,L,trp,,,,, +BGC0000374,orf00013,A1,FALSE,L,ala(80.0);gly(75.0);ala-d(70.0);leu(70.0);cys(65.0),11144.0,12728.0,+,BGC0000374.1,"#,0,1,2,3,4,5,6",-,-,-,-,,,,, +BGC0000374,orf00017,A1,FALSE,L,cha(60.0);met(50.0);vol(50.0);bmt(50.0);hty(50.0),15557.0,31262.0,+,BGC0000374.1,"#,0,1,2,3,4,5,6",1,@L-X1,L,none,,,,, +BGC0000374,orf00017,A2,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),15557.0,31262.0,+,BGC0000374.1,"#,0,1,2,3,4,5,6",2,@D-aThr/Thr,D,thr,,,,, +BGC0000374,orf00017,A3,FALSE,L,phe(75.0);trp(70.0);abu(55.0);leu(50.0);bmt(50.0),15557.0,31262.0,+,BGC0000374.1,"#,0,1,2,3,4,5,6",3,@L-bMe-Phe,L,phe+MT,,,,, +BGC0000374,orf00017,A4,FALSE,D,cha(60.0);met(50.0);vol(50.0);bmt(50.0);hty(50.0),15557.0,31262.0,+,BGC0000374.1,"#,0,1,2,3,4,5,6",4,@D-X1,D,none,,,,, +BGC0000374,orf00018,A1,FALSE,L,phe(75.0);trp(70.0);abu(55.0);leu(50.0);bmt(50.0),33715.0,41674.0,+,BGC0000374.1,"#,0,1,2,3,4,5,6",5,@L-bMe-Phe,L,phe+MT,,,,, +BGC0000374,orf00018,A2,FALSE,L,val(95.0);ile(80.0);abu(80.0);leu(70.0);ala(60.0),33715.0,41674.0,+,BGC0000374.1,"#,0,1,2,3,4,5,6",6,@L-Ile/aIle,L,ile,,,,, +BGC0000379,orf00018,A1,FALSE,L,ser(40.0);dab(40.0);hty(40.0);gua(40.0);cit(40.0),22787.0,25616.0,+,BGC0000379,"#,0,1,2,3,4,5,6,7,8,9,10",1,@L-Asp,L,asp,"10,9,8,7,6,5,4,3,2,1*0",0,Asp,, +BGC0000379,orf00025,A1,FALSE,L,asp(95.0);asn(85.0);glu(65.0);gln(60.0);hty(60.0),32711.0,42275.0,+,BGC0000379,"#,0,1,2,3,4,5,6,7,8,9,10",2,@L-Dpr,L,dpr,"10,9,8,7,6,5,4,3,2,1*0",1,Dap,, +BGC0000379,orf00025,A2,FALSE,D,pip(85.0);pro(70.0);ala(50.0);ile(50.0);leu(50.0),32711.0,42275.0,+,BGC0000379,"#,0,1,2,3,4,5,6,7,8,9,10",3,@L-Hpr,L,pip,"10,9,8,7,6,5,4,3,2,1*0",2,Pip,, +BGC0000379,orf00026,A1,FALSE,L,gly(100.0);ala(75.0);leu(65.0);val(65.0);ile(60.0),42271.0,62701.0,+,BGC0000379,"#,0,1,2,3,4,5,6,7,8,9,10",4,Gly,NA,gly,"10,9,8,7,6,5,4,3,2,1*0",3,Gly,, +BGC0000379,orf00026,A2,FALSE,L,asp(100.0);asn(85.0);glu(65.0);gln(60.0);hty(60.0),42271.0,62701.0,+,BGC0000379,"#,0,1,2,3,4,5,6,7,8,9,10",5,@L-Asp,L,asp,"10,9,8,7,6,5,4,3,2,1*0",4,Asp,, +BGC0000379,orf00026,A3,FALSE,L,gly(100.0);ala(75.0);leu(65.0);val(65.0);ile(60.0),42271.0,62701.0,+,BGC0000379,"#,0,1,2,3,4,5,6,7,8,9,10",6,Gly,NA,gly,"10,9,8,7,6,5,4,3,2,1*0",5,Gly,, +BGC0000379,orf00026,A4,FALSE,L,asp(100.0);asn(85.0);glu(65.0);gln(60.0);hty(60.0),42271.0,62701.0,+,BGC0000379,"#,0,1,2,3,4,5,6,7,8,9,10",7,@L-Asp,L,asp,"10,9,8,7,6,5,4,3,2,1*0",6,Asp,, +BGC0000379,orf00026,A5,FALSE,L,gly(100.0);ala(75.0);leu(65.0);val(65.0);ile(60.0),42271.0,62701.0,+,BGC0000379,"#,0,1,2,3,4,5,6,7,8,9,10",8,Gly,NA,gly,"10,9,8,7,6,5,4,3,2,1*0",7,Gly,, +BGC0000379,orf00026,A6,FALSE,D,thr(100.0);allothr(95.0);dht(90.0);ser(80.0);dhpg(60.0),42271.0,62701.0,+,BGC0000379,"#,0,1,2,3,4,5,6,7,8,9,10",9,@L-aThr/Thr,L,thr,"10,9,8,7,6,5,4,3,2,1*0",8,D-Thr,, +BGC0000379,orf00027,A1,FALSE,L,val(90.0);ile(80.0);leu(75.0);abu(75.0);ala(60.0),62700.0,69918.0,+,BGC0000379,"#,0,1,2,3,4,5,6,7,8,9,10",10,@L-Ile/aIle,L,ile,"10,9,8,7,6,5,4,3,2,1*0",9,Ile,, +BGC0000379,orf00027,A2,FALSE,L,pro(100.0);pip(80.0);uda(60.0);ala(55.0);ser(55.0),62700.0,69918.0,+,BGC0000379,"#,0,1,2,3,4,5,6,7,8,9,10",11,@L-Pro,L,pro,"10,9,8,7,6,5,4,3,2,1*0",10,Pro,, +BGC0000385,orf00066,A1,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),80786.0,92306.0,+,BGC0000385,"#,0,1,2,3,4,5,6,7,8,9",0,Leu,NA,leu,"10,9,8,7,6,5,4,3,2*1,0",0,D-Leu,, +BGC0000385,orf00066,A2,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),80786.0,92306.0,+,BGC0000385,"#,0,1,2,3,4,5,6,7,8,9",1,Leu,NA,leu,"10,9,8,7,6,5,4,3,2*1,0",1,Leu,, +BGC0000385,orf00066,A3,FALSE,L,ala(70.0);leu(70.0);val(70.0);gly(65.0);ile(60.0),80786.0,92306.0,+,BGC0000385,"#,0,1,2,3,4,5,6,7,8,9",2,Ph-Ser,NA,none,"10,9,8,7,6,5,4,3,2*1,0",2,hyPhe,, +BGC0000385,orf00066,A4,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),80786.0,92306.0,+,BGC0000385,"#,0,1,2,3,4,5,6,7,8,9",3,3OH-Leu,NA,leu+unk,"10,9,8,7,6,5,4,3,2*1,0",3,hyLeu,, +BGC0000385,orf00067,A1,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),92320.0,117916.0,+,BGC0000385,"#,0,1,2,3,4,5,6,7,8,9",4,Leu,NA,leu,"10,9,8,7,6,5,4,3,2*1,0",4,Leu,, +BGC0000385,orf00067,A2,FALSE,D,glu(70.0);asp(65.0);arg(60.0);asn(60.0);gln(60.0),92320.0,117916.0,+,BGC0000385,"#,0,1,2,3,4,5,6,7,8,9",5,Arg,NA,arg,"10,9,8,7,6,5,4,3,2*1,0",5,D-Arg,, +BGC0000385,orf00067,A3,FALSE,L,ile(100.0);leu(80.0);val(80.0);abu(70.0);ala(60.0),92320.0,117916.0,+,BGC0000385,"#,0,1,2,3,4,5,6,7,8,9",6,Ile/aIle,NA,ile,"10,9,8,7,6,5,4,3,2*1,0",6,Ile,, +BGC0000385,orf00067,A4,FALSE,L,thr(100.0);allothr(95.0);dht(90.0);ser(80.0);dhpg(60.0),92320.0,117916.0,+,BGC0000385,"#,0,1,2,3,4,5,6,7,8,9",7,aThr/Thr,NA,thr,"10,9,8,7,6,5,4,3,2*1,0",7,aThr,, +BGC0000385,orf00067,A5,FALSE,L,gly(95.0);ala(75.0);leu(70.0);ile(65.0);val(65.0),92320.0,117916.0,+,BGC0000385,"#,0,1,2,3,4,5,6,7,8,9",8,Gly,NA,gly,"10,9,8,7,6,5,4,3,2*1,0",8,Gly,, +BGC0000385,orf00067,A6,FALSE,L,asn(100.0);asp(85.0);gln(65.0);glu(65.0);arg(55.0),92320.0,117916.0,+,BGC0000385,"#,0,1,2,3,4,5,6,7,8,9",9,OH-Asn,NA,asn+unk,"10,9,8,7,6,5,4,3,2*1,0",9,hyAsn,, +BGC0000385,orf00067,A7,FALSE,L,ser(100.0);thr(70.0);hpg(65.0);allothr(60.0);dpg(60.0),92320.0,117916.0,+,BGC0000385,"#,0,1,2,3,4,5,6,7,8,9",10,Ser,NA,ser,"10,9,8,7,6,5,4,3,2*1,0",10,Ser,, +BGC0000388,-,-,,,,,,,BGC0000388,"0,1,2,3,4,5*",3,@L-bhEnd,L,none,,,,, +BGC0000388,orf00009,A1,FALSE,L,thr(90.0);ser(85.0);allothr(85.0);dht(70.0);ala(60.0),10101.0,18312.0,+,BGC0000388,"0,1,2,3,4,5*",5,@L-Ser,L,ser,"0,1,2,3,4,5*",0,Ser,, +BGC0000388,orf00009,A2,FALSE,L,ser(65.0);met(60.0);ala(50.0);ala-d(50.0);cys(50.0),10101.0,18312.0,+,BGC0000388,"0,1,2,3,4,5*",0,Gly,NA,gly,"0,1,2,3,4,5*",1,Gly,, +BGC0000388,orf00009,A3,FALSE,L,ser(60.0);dhpg(60.0);dhp(60.0);thr(55.0);allothr(55.0),10101.0,18312.0,+,BGC0000388,"0,1,2,3,4,5*",1,@L-bMe-Phe,L,phe+MT,"0,1,2,3,4,5*",2,Phe,, +BGC0000388,orf00010,A1,FALSE,D,tyr(100.0);bht(95.0);phe(85.0);trp(70.0);uda(60.0),18308.0,29315.0,+,BGC0000388,"0,1,2,3,4,5*",2,@D-Tyr,D,tyr,"0,1,2,3,4,5*",3,Tyr,, +BGC0000388,orf00010,A2,FALSE,D,ala(65.0);ala-d(65.0);gly(65.0);leu(65.0);ile(60.0),18308.0,29315.0,+,BGC0000388,"0,1,2,3,4,5*",4,@D-bhEnd,D,none,"0,1,2,3,4,5*",4,End,, +BGC0000388,orf00010,A2,FALSE,D,ala(65.0);ala-d(65.0);gly(65.0);leu(65.0);ile(60.0),18308.0,29315.0,+,BGC0000388,"0,1,2,3,4,5*",4,@D-bhEnd,D,none,"0,1,2,3,4,5*",5,End,, +BGC0000389,-,-,,,,,,,BGC0000389,"#,0,1,2,3,4,5,6,7",0,@L-Leu,L,leu,,,,, +BGC0000389,-,-,,,,,,,BGC0000389,"#,0,1,2,3,4,5,6,7",1,@D-Glu,D,glu,,,,, +BGC0000389,orf00001,A1,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),1176.0,12924.0,+,BGC0000389,"#,0,1,2,3,4,5,6,7",2,@D-aThr/Thr,D,thr,"8,7,6,5,4,3,2*1,0",2,Thr,, +BGC0000389,orf00001,A2,FALSE,D,ile(100.0);leu(80.0);val(80.0);abu(70.0);ala(60.0),1176.0,12924.0,+,BGC0000389,"#,0,1,2,3,4,5,6,7",3,@D-Ile/aIle,D,ile,"8,7,6,5,4,3,2*1,0",3,Val,, +BGC0000389,orf00001,A3,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),1176.0,12924.0,+,BGC0000389,"#,0,1,2,3,4,5,6,7",4,@L-Leu,L,leu,"8,7,6,5,4,3,2*1,0",4,Leu,, +BGC0000389,orf00001,A4,FALSE,L,ser(100.0);thr(70.0);hpg(65.0);allothr(60.0);dpg(60.0),1176.0,12924.0,+,BGC0000389,"#,0,1,2,3,4,5,6,7",5,@D-Ser,D,ser,"8,7,6,5,4,3,2*1,0",5,Ser,, +BGC0000389,-,-,,,,,,,BGC0000389,"#,0,1,2,3,4,5,6,7",6,@L-Leu,L,leu,,,,, +BGC0000389,orf00002,A1,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),16490.0,24248.0,+,BGC0000389,"#,0,1,2,3,4,5,6,7",7,@D-Ser,D,ser,,,,, +BGC0000389,orf00002,A2,FALSE,L,ser(100.0);thr(70.0);hpg(65.0);allothr(60.0);dpg(60.0),16490.0,24248.0,+,BGC0000389,"#,0,1,2,3,4,5,6,7",8,@L-Ile/aIle,L,ile,,,,, +BGC0000397,orf00007,A1,FALSE,L,tyr(100.0);bht(95.0);phe(85.0);trp(70.0);uda(60.0),8495.0,19703.0,+,BGC0000397,"#,0,1,2,3,4,5",-,-,-,-,,,,, +BGC0000397,orf00007,A2,FALSE,L,gly(100.0);ala(75.0);leu(65.0);val(65.0);ile(60.0),8495.0,19703.0,+,BGC0000397,"#,0,1,2,3,4,5",1,Gly,NA,gly,,,,, +BGC0000397,orf00007,A3,FALSE,D,gln(100.0);glu(80.0);asp(65.0);asn(60.0);arg(55.0),8495.0,19703.0,+,BGC0000397,"#,0,1,2,3,4,5",2,@D-Gln,D,gln,,,,, +BGC0000397,orf00008,A1,FALSE,L,ile(100.0);leu(85.0);val(85.0);abu(70.0);ala(65.0),19699.0,34111.0,+,BGC0000397,"#,0,1,2,3,4,5",3,@L-Ile/aIle,L,ile,,,,, +BGC0000397,orf00008,A2,FALSE,L,ser(100.0);dab(70.0);hpg(65.0);thr(60.0);allothr(60.0),19699.0,34111.0,+,BGC0000397,"#,0,1,2,3,4,5",4,@L-Ser,L,ser,,,,, +BGC0000397,orf00008,A3,FALSE,L,pro(95.0);pip(70.0);gua(60.0);uda(60.0);ala(55.0),19699.0,34111.0,+,BGC0000397,"#,0,1,2,3,4,5",5,@L-4Me-Pro,L,pro+MT,,,,, +BGC0000397,orf00008,A4,FALSE,L,phe(90.0);trp(80.0);bht(80.0);bmt(80.0);tyr(75.0),19699.0,34111.0,+,BGC0000397,"#,0,1,2,3,4,5",-,-,-,-,,,,, +BGC0000399,orf00003,A1,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),2885.0,9251.0,+,BGC0000399.1,"#,0,1,2,3,4,5,6,7,8",0,@L-Leu,L,leu,"9,8,7,6,5,4,3,2*1,0",0,L-Leu,, +BGC0000399,orf00003,A2,FALSE,D,asp(90.0);glu(75.0);asn(70.0);gln(70.0);arg(55.0),2885.0,9251.0,+,BGC0000399.1,"#,0,1,2,3,4,5,6,7,8",1,@D-Glu,D,glu,"9,8,7,6,5,4,3,2*1,0",1,D-Glu/Asp,, +BGC0000399,orf00004,A1,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),9568.0,22672.0,+,BGC0000399.1,"#,0,1,2,3,4,5,6,7,8",2,@D-aThr/Thr,D,thr,"9,8,7,6,5,4,3,2*1,0",2,D-allo Thr,, +BGC0000399,orf00004,A2,FALSE,D,ile(100.0);leu(80.0);val(80.0);abu(70.0);ala(60.0),9568.0,22672.0,+,BGC0000399.1,"#,0,1,2,3,4,5,6,7,8",3,@D-Ile/aIle,D,ile,"9,8,7,6,5,4,3,2*1,0",3,D-allo Ile,, +BGC0000399,orf00004,A3,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),9568.0,22672.0,+,BGC0000399.1,"#,0,1,2,3,4,5,6,7,8",4,@L-Leu,L,leu,"9,8,7,6,5,4,3,2*1,0",4,L-Leu,, +BGC0000399,orf00004,A4,FALSE,D,ser(100.0);thr(70.0);hpg(65.0);allothr(60.0);dpg(60.0),9568.0,22672.0,+,BGC0000399.1,"#,0,1,2,3,4,5,6,7,8",5,@D-Ser,D,ser,"9,8,7,6,5,4,3,2*1,0",5,D-Ser,, +BGC0000399,orf00005,A1,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),22668.0,37374.0,+,BGC0000399.1,"#,0,1,2,3,4,5,6,7,8",6,@L-Leu,L,leu,"9,8,7,6,5,4,3,2*1,0",6,L-Leu,, +BGC0000399,orf00005,A2,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),22668.0,37374.0,+,BGC0000399.1,"#,0,1,2,3,4,5,6,7,8",7,@L-Leu,L,leu,"9,8,7,6,5,4,3,2*1,0",7,L-Leu,, +BGC0000399,orf00005,A3,FALSE,D,ser(100.0);thr(70.0);hpg(65.0);allothr(60.0);dpg(60.0),22668.0,37374.0,+,BGC0000399.1,"#,0,1,2,3,4,5,6,7,8",8,@D-Ser,D,ser,"9,8,7,6,5,4,3,2*1,0",8,D-Ser,, +BGC0000399,orf00005,A4,FALSE,L,ile(90.0);val(85.0);leu(80.0);abu(75.0);ala(65.0),22668.0,37374.0,+,BGC0000399.1,"#,0,1,2,3,4,5,6,7,8",9,@L-Val,L,val,"9,8,7,6,5,4,3,2*1,0",9,L-Val,, +BGC0000407,orf00004,A1,FALSE,L,glu(100.0);gln(80.0);asp(70.0);bmt(70.0);asn(65.0),4372.0,12079.0,+,BGC0000407,"#,0,1,2,3,4,5,6,7,8",0,Glu,NA,glu,"7,6,5,4,3,2,1,0*8,9,#",9,Glu,, +BGC0000407,orf00004,A2,FALSE,D,orn(100.0);dab(90.0);lys(75.0);arg(70.0);gln(50.0),4372.0,12079.0,+,BGC0000407,"#,0,1,2,3,4,5,6,7,8",1,Orn,NA,orn,"7,6,5,4,3,2,1,0*8,9,#",8,Orn,, +BGC0000407,orf00005,A1,FALSE,L,tyr(100.0);trp(65.0);bht(65.0);phe(60.0);phg(50.0),12094.0,19786.0,+,BGC0000407,"#,0,1,2,3,4,5,6,7,8",2,Tyr,NA,tyr,"7,6,5,4,3,2,1,0*8,9,#",0,Tyr,, +BGC0000407,orf00005,A2,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),12094.0,19786.0,+,BGC0000407,"#,0,1,2,3,4,5,6,7,8",3,aThr/Thr,NA,thr,"7,6,5,4,3,2,1,0*8,9,#",7,Thr/Ser,, +BGC0000407,orf00006,A1,FALSE,L,glu(100.0);gln(80.0);asp(70.0);bmt(70.0);asn(65.0),19803.0,27471.0,+,BGC0000407,"#,0,1,2,3,4,5,6,7,8",4,Glu,NA,glu,"7,6,5,4,3,2,1,0*8,9,#",6,Glu,, +BGC0000407,orf00006,A2,FALSE,D,val(100.0);abu(80.0);ile(75.0);ala(70.0);leu(70.0),19803.0,27471.0,+,BGC0000407,"#,0,1,2,3,4,5,6,7,8",5,Ala,NA,ala,"7,6,5,4,3,2,1,0*8,9,#",5,Ala/Val/Aba,, +BGC0000407,orf00007,A1,FALSE,L,pro(100.0);pip(75.0);uda(60.0);ala(55.0);ala-d(50.0),27490.0,38347.0,+,BGC0000407,"#,0,1,2,3,4,5,6,7,8",6,Pro,NA,pro,"7,6,5,4,3,2,1,0*8,9,#",4,Pro,, +BGC0000407,orf00007,A2,FALSE,L,glu(100.0);gln(75.0);asn(65.0);asp(65.0);aad(60.0),27490.0,38347.0,+,BGC0000407,"#,0,1,2,3,4,5,6,7,8",7,Gln,NA,gln,"7,6,5,4,3,2,1,0*8,9,#",3,Gln,, +BGC0000407,orf00007,A3,FALSE,D,tyr(100.0);trp(65.0);bht(65.0);phe(60.0);phg(50.0),27490.0,38347.0,+,BGC0000407,"#,0,1,2,3,4,5,6,7,8",8,Tyr,NA,tyr,"7,6,5,4,3,2,1,0*8,9,#",2,Tyr,, +BGC0000407,orf00008,A1,FALSE,L,ile(100.0);leu(80.0);val(80.0);abu(70.0);ala(65.0),38375.0,42194.0,+,BGC0000407,"#,0,1,2,3,4,5,6,7,8",9,Ile/aIle,NA,ile,"7,6,5,4,3,2,1,0*8,9,#",1,Ile/Val,, +BGC0000407,orf00013,A1,FALSE,L,aad(60.0);asp(55.0);asn(50.0);glu(50.0);gln(45.0),46129.0,47779.0,+,BGC0000407,"#,0,1,2,3,4,5,6,7,8",-,-,-,-,,,,, +BGC0000416,-,-,,,,,,,BGC0000416.2,"#,0,1,2,3,4",0,@D-NMe-Val,D,val+MT,,,,, +BGC0000416,-,-,,,,,,,BGC0000416.2,"#,0,1,2,3,4",1,@D-NMe-Leu,D,leu+MT,,,,, +BGC0000416,orf00001,A1,TRUE,L,phe(65.0);tyr(65.0);bht(60.0);ala(55.0);trp(55.0),0.0,4683.0,+,BGC0000416.2,"#,0,1,2,3,4",2,@D-NMe-Val,D,val+MT,"0,1,2,",0,Leu,, +BGC0000416,orf00002,A1,TRUE,L,phe(65.0);tyr(65.0);bht(60.0);ala(55.0);trp(55.0),4679.0,9344.0,+,BGC0000416.2,"#,0,1,2,3,4",3,@D-NMe-Val,D,val+MT,"0,1,2,",1,Val,, +BGC0000416,orf00003,A1,TRUE,L,phe(65.0);tyr(65.0);bht(60.0);ala(55.0);trp(55.0),9340.0,15337.0,+,BGC0000416.2,"#,0,1,2,3,4",4,@D-NMe-Val,D,val+MT,"0,1,2,",2,Val,, +BGC0000424,orf00007,A1,FALSE,D,asp(100.0);asn(90.0);glu(65.0);gln(60.0);cha(60.0),6092.0,33575.0,+,BGC0000424.1,"#,0,1,2,3,4",0,OH-Asp,NA,asp+unk,"0,1,2,3,4,5",0,Asp,, +BGC0000424,orf00007,A2,FALSE,L,ser(100.0);dab(70.0);hpg(65.0);thr(60.0);allothr(60.0),6092.0,33575.0,+,BGC0000424.1,"#,0,1,2,3,4",1,Ser,NA,ser,"0,1,2,3,4,5",1,Ser,, +BGC0000424,orf00007,A3,FALSE,L,asp(100.0);asn(90.0);glu(65.0);gln(60.0);cha(60.0),6092.0,33575.0,+,BGC0000424.1,"#,0,1,2,3,4",2,OH-Asp,NA,asp+unk,"0,1,2,3,4,5",2,Asp,, +BGC0000424,orf00007,A4,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),6092.0,33575.0,+,BGC0000424.1,"#,0,1,2,3,4",3,aThr/Thr,NA,thr,"0,1,2,3,4,5",3,Thr,, +BGC0000424,orf00007,A5,FALSE,L,ser(100.0);dab(70.0);hpg(65.0);thr(60.0);allothr(60.0),6092.0,33575.0,+,BGC0000424.1,"#,0,1,2,3,4",4,Ser,NA,ser,"0,1,2,3,4,5",4,Ser,, +BGC0000424,orf00007,A6,FALSE,L,gln(70.0);asn(65.0);asp(65.0);glu(65.0);orn(60.0),6092.0,33575.0,+,BGC0000424.1,"#,0,1,2,3,4",-,-,-,-,"0,1,2,3,4,5",5,N^5-OH Orn,, +BGC0000425,orf00004,A1,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),4805.0,24047.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",0,dhAbu,NA,abu+unk,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",0,x-dhAbu,,dhAbu +BGC0000425,orf00004,A2,FALSE,D,pro(95.0);pip(75.0);uda(60.0);ser(55.0);ala(50.0),4805.0,24047.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",1,Pro,NA,pro,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",1,D-Pro,, +BGC0000425,orf00004,A3,FALSE,D,ser(100.0);thr(70.0);hpg(65.0);allothr(60.0);dpg(60.0),4805.0,24047.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",2,@D-Ser,D,ser,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",2,D-Ser,, +BGC0000425,orf00004,A4,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),4805.0,24047.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",3,@D-Leu,D,leu,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",3,D-Leu,, +BGC0000425,orf00004,A5,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),4805.0,24047.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",4,@D-Val,D,val,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",4,D-Val,, +BGC0000425,orf00004,A6,FALSE,D,gln(95.0);glu(80.0);asp(65.0);asn(60.0);arg(55.0),4805.0,24047.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",5,@D-Gln,D,gln,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",5,D-Gln,, +BGC0000425,orf00005,A1,FALSE,D,leu(95.0);ile(75.0);val(70.0);abu(70.0);ala(65.0),24043.0,43444.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",6,@D-Leu,D,leu,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",6,D-Leu,, +BGC0000425,orf00005,A2,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),24043.0,43444.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",7,@D-Val,D,val,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",7,D-Val,, +BGC0000425,orf00005,A3,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),24043.0,43444.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",8,@L-Val,L,val,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",8,L-Val,, +BGC0000425,orf00005,A4,FALSE,D,gln(95.0);glu(80.0);asp(65.0);asn(60.0);arg(55.0),24043.0,43444.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",9,@D-Gln,D,gln,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",9,D-Gln,, +BGC0000425,orf00005,A5,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),24043.0,43444.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",10,@L-Leu,L,leu,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",10,L-Leu,, +BGC0000425,orf00005,A6,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),24043.0,43444.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",11,@D-Val,D,val,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",11,D-Val,, +BGC0000425,orf00006,A1,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),43821.0,64995.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",12,dhAbu,NA,abu+unk,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",12,x-dhAbu,,dhAbu +BGC0000425,orf00006,A2,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),43821.0,64995.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",13,@D-aThr/Thr,D,thr,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",13,D-Thr,, +BGC0000425,orf00006,A3,FALSE,L,ile(100.0);leu(80.0);val(80.0);abu(70.0);ala(60.0),43821.0,64995.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",14,@L-Ile/aIle,L,ile,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",14,L-lle,, +BGC0000425,orf00006,A4,FALSE,L,asn(65.0);asp(65.0);gln(60.0);glu(60.0);aad(60.0),43821.0,64995.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",15,@L-Hse,L,hse,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",15,L-Hse,, +BGC0000425,orf00006,A5,FALSE,D,dab(100.0);ala(65.0);ala-d(60.0);val(60.0);gly(55.0),43821.0,64995.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",16,@D-Dab,D,dab,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",16,D-Dab,, +BGC0000425,orf00006,A6,FALSE,L,bmt(50.0);end(50.0);cha(50.0);dab(40.0);hyv(40.0),43821.0,64995.0,+,BGC0000425,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",17,@L-Lys,L,lys,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17",17,L-Lys,, +BGC0000433,orf00011,A1,FALSE,L,glu(95.0);gln(90.0);asp(80.0);asn(65.0);arg(60.0),8557.0,19312.0,+,BGC0000433,"#,0,1,2,3,4,5",0,@L-Glu,L,glu,,,,, +BGC0000433,orf00011,A2,FALSE,L,leu(100.0);val(80.0);ile(75.0);ala(65.0);gly(65.0),8557.0,19312.0,+,BGC0000433,"#,0,1,2,3,4,5",1,@L-Leu,L,leu,,,,, +BGC0000433,orf00011,A3,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),8557.0,19312.0,+,BGC0000433,"#,0,1,2,3,4,5",2,@D-Leu,D,leu,,,,, +BGC0000433,orf00012,A1,FALSE,L,val(100.0);ile(80.0);abu(80.0);leu(75.0);ala(65.0),19333.0,30094.0,+,BGC0000433,"#,0,1,2,3,4,5",3,@L-Val,L,val,,,,, +BGC0000433,orf00012,A2,FALSE,L,asp(100.0);asn(85.0);gln(60.0);glu(60.0);lys(55.0),19333.0,30094.0,+,BGC0000433,"#,0,1,2,3,4,5",4,@L-Asp,L,asp,,,,, +BGC0000433,orf00012,A3,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),19333.0,30094.0,+,BGC0000433,"#,0,1,2,3,4,5",5,@D-Leu,D,leu,,,,, +BGC0000433,orf00013,A1,FALSE,L,leu(100.0);val(90.0);ile(85.0);gly(70.0);ala(65.0),30128.0,33965.0,+,BGC0000433,"#,0,1,2,3,4,5",6,@L-Leu,L,leu,,,,, +BGC0000437,orf00003,A1,FALSE,D,ser(100.0);hpg(70.0);dpg(65.0);thr(60.0);dab(60.0),2470.0,30844.0,-,BGC0000437,"#,0,1,2,3,4,5,6,7",0,@L-Ser,L,ser,"1,2,3,4,5,6,7,8,0*,#",0,Ser,, +BGC0000437,orf00003,A2,FALSE,D,ser(100.0);hpg(70.0);dpg(65.0);thr(60.0);dab(60.0),2470.0,30844.0,-,BGC0000437,"#,0,1,2,3,4,5,6,7",1,@D-Ser,D,ser,"1,2,3,4,5,6,7,8,0*,#",1,Ser,, +BGC0000437,orf00003,A3,FALSE,D,dab(100.0);ala(65.0);ala-d(60.0);val(60.0);gly(55.0),2470.0,30844.0,-,BGC0000437,"#,0,1,2,3,4,5,6,7",2,@D-Dab,D,dab,"1,2,3,4,5,6,7,8,0*,#",2,Dab,, +BGC0000437,orf00003,A4,FALSE,L,dab(100.0);ala(65.0);ala-d(60.0);val(60.0);gly(55.0),2470.0,30844.0,-,BGC0000437,"#,0,1,2,3,4,5,6,7",3,@L-Dab,L,dab,"1,2,3,4,5,6,7,8,0*,#",3,Dab,, +BGC0000437,orf00003,A5,FALSE,L,arg(100.0);lys(75.0);orn(70.0);dab(60.0);asp(50.0),2470.0,30844.0,-,BGC0000437,"#,0,1,2,3,4,5,6,7",4,@L-Arg,L,arg,"1,2,3,4,5,6,7,8,0*,#",4,Arg,, +BGC0000437,orf00003,A6,FALSE,L,phe(100.0);tyr(80.0);trp(70.0);bht(65.0);phg(55.0),2470.0,30844.0,-,BGC0000437,"#,0,1,2,3,4,5,6,7",5,@L-Phe,L,phe,"1,2,3,4,5,6,7,8,0*,#",5,Phe,, +BGC0000437,orf00003,A7,FALSE,D,thr(100.0);allothr(95.0);dht(90.0);ser(80.0);dhpg(60.0),2470.0,30844.0,-,BGC0000437,"#,0,1,2,3,4,5,6,7",6,dhAbu,NA,abu+unk,"1,2,3,4,5,6,7,8,0*,#",6,Dhb,, +BGC0000437,orf00003,A8,FALSE,L,asp(100.0);glu(80.0);gln(75.0);asn(70.0);cit(60.0),2470.0,30844.0,-,BGC0000437,"#,0,1,2,3,4,5,6,7",7,@L-OH-Asp,L,asp+unk,"1,2,3,4,5,6,7,8,0*,#",7,(3-OH)Asp,, +BGC0000437,orf00006,A1,FALSE,L,thr(100.0);allothr(90.0);ser(80.0);dht(80.0);hpg(60.0),33476.0,35321.0,-,BGC0000437,"#,0,1,2,3,4,5,6,7",8,@L-4Cl-Thr,L,thr+unk,"1,2,3,4,5,6,7,8,0*,#",8,(4-Cl)Thr,, +BGC0000438,-,-,,,,,,,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",3,@L-Ala,L,ala,,,,, +BGC0000438,-,-,,,,,,,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",4,@L-Ala,L,ala,,,,, +BGC0000438,-,-,,,,,,,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",6,@L-Leu,L,leu,,,,, +BGC0000438,orf00001,A1,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),137.0,16277.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",0,dhAbu,NA,abu+unk,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",21,Dhb,, +BGC0000438,orf00001,A2,FALSE,D,pro(100.0);pip(80.0);uda(60.0);ala(55.0);ser(55.0),137.0,16277.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",1,@L-Pro,L,pro,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",20,Pro,, +BGC0000438,orf00001,A3,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),137.0,16277.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",2,@L-Val,L,val,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",19,Val,, +BGC0000438,orf00001,A4,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),137.0,16277.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",5,@L-Val,L,val,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",18,Val,, +BGC0000438,orf00001,A5,FALSE,D,ala(100.0);gly(70.0);ala-d(65.0);ile(60.0);val(60.0),137.0,16277.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",7,@L-Ala,L,ala,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",17,Ala,, +BGC0000438,orf00002,A1,FALSE,D,ala(100.0);gly(70.0);ala-d(65.0);ile(60.0);val(60.0),16279.0,32689.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",8,@L-Ala,L,ala,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",16,Ala,, +BGC0000438,orf00002,A2,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),16279.0,32689.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",9,dhAbu,NA,abu+unk,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",15,Val,, +BGC0000438,orf00002,A3,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),16279.0,32689.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",10,@L-Val,L,val,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",14,Val,, +BGC0000438,orf00002,A4,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),16279.0,32689.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",11,dhAbu,NA,abu+unk,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",13,Dhb,, +BGC0000438,orf00002,A5,FALSE,D,ala(100.0);gly(70.0);ala-d(65.0);ile(60.0);val(60.0),16279.0,32689.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",12,@L-Ala,L,ala,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",12,Ala,, +BGC0000438,orf00003,A1,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),33114.0,73725.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",13,@L-Val,L,val,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",11,Val,, +BGC0000438,orf00003,A10,FALSE,L,dab(100.0);ala(65.0);ala-d(60.0);val(60.0);gly(55.0),33114.0,73725.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",22,@L-Dab,L,dab,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",2,Dab,, +BGC0000438,orf00003,A11,FALSE,L,dab(100.0);ala(65.0);ala-d(60.0);val(60.0);gly(55.0),33114.0,73725.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",23,@L-Dab,L,dab,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",1,Dab,, +BGC0000438,orf00003,A12,FALSE,L,tyr(90.0);phe(80.0);trp(65.0);bht(65.0);phg(55.0),33114.0,73725.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",24,@L-Tyr,L,tyr,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",0,Tyr,, +BGC0000438,orf00003,A2,FALSE,D,ala(100.0);gly(70.0);ala-d(65.0);ile(60.0);val(60.0),33114.0,73725.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",14,@L-Ala,L,ala,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",10,Ala,, +BGC0000438,orf00003,A3,FALSE,D,ala(100.0);gly(70.0);ala-d(65.0);ile(60.0);val(60.0),33114.0,73725.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",15,@L-Ala,L,ala,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",9,Ala,, +BGC0000438,orf00003,A4,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),33114.0,73725.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",16,dhAbu,NA,abu+unk,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",8,Dhb,, +BGC0000438,orf00003,A5,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),33114.0,73725.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",17,@L-aThr/Thr,L,thr,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",7,aThr,, +BGC0000438,orf00003,A6,FALSE,D,ser(100.0);hpg(70.0);dpg(65.0);thr(60.0);dab(60.0),33114.0,73725.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",18,@L-Ser,L,ser,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",6,Ser,, +BGC0000438,orf00003,A7,FALSE,D,ala(100.0);gly(70.0);ala-d(65.0);ile(60.0);val(60.0),33114.0,73725.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",19,@L-Ala,L,ala,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",5,Ala,, +BGC0000438,orf00003,A8,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),33114.0,73725.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",20,@L-Val,L,val,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",4,Dhb,, +BGC0000438,orf00003,A9,FALSE,L,ala(100.0);gly(70.0);ala-d(65.0);ile(60.0);val(60.0),33114.0,73725.0,+,BGC0000438,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23",21,@L-Ala,L,ala,"0,1,2,3,4,5,6,7*,8,9,10,11,12,13,14,15,16,17,18,19,20,21,#",3,Ala,, +BGC0000439,orf00012,A1,FALSE,L,tyr(75.0);bht(75.0);ahp(60.0);met(50.0);dht(50.0),11571.0,28905.0,+,BGC0000439,"#,0,1,2,3,4,5,6,7,8,9,10,11",-,-,-,-,"12,11,10,9,8,7,6,5,4,3*,2,1,0,#",0,6-Сl-Trp,, +BGC0000439,orf00012,A2,FALSE,D,asn(85.0);asp(85.0);glu(65.0);gln(60.0);aad(50.0),11571.0,28905.0,+,BGC0000439,"#,0,1,2,3,4,5,6,7,8,9,10,11",1,@D-Asn,D,asn,"12,11,10,9,8,7,6,5,4,3*,2,1,0,#",1,Asp,, +BGC0000439,orf00012,A3,FALSE,L,asp(90.0);asn(85.0);gln(60.0);glu(60.0);hty(60.0),11571.0,28905.0,+,BGC0000439,"#,0,1,2,3,4,5,6,7,8,9,10,11",2,@L-Asp,L,asp,"12,11,10,9,8,7,6,5,4,3*,2,1,0,#",2,Asp,, +BGC0000439,orf00012,A4,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),11571.0,28905.0,+,BGC0000439,"#,0,1,2,3,4,5,6,7,8,9,10,11",3,@L-aThr/Thr,L,thr,"12,11,10,9,8,7,6,5,4,3*,2,1,0,#",3,Thr,, +BGC0000439,orf00012,A5,FALSE,L,gly(100.0);ala(75.0);leu(65.0);val(65.0);ile(60.0),11571.0,28905.0,+,BGC0000439,"#,0,1,2,3,4,5,6,7,8,9,10,11",4,Gly,NA,gly,"12,11,10,9,8,7,6,5,4,3*,2,1,0,#",4,Gly,, +BGC0000439,orf00013,A1,FALSE,L,glu(65.0);arg(55.0);asp(55.0);gln(55.0);lys(50.0),28901.0,50819.0,+,BGC0000439,"#,0,1,2,3,4,5,6,7,8,9,10,11",5,@L-Orn,L,orn,"12,11,10,9,8,7,6,5,4,3*,2,1,0,#",5,Orn,, +BGC0000439,orf00013,A2,FALSE,L,asp(90.0);asn(85.0);gln(60.0);glu(60.0);hty(60.0),28901.0,50819.0,+,BGC0000439,"#,0,1,2,3,4,5,6,7,8,9,10,11",6,@L-Asp,L,asp,"12,11,10,9,8,7,6,5,4,3*,2,1,0,#",6,Asp,, +BGC0000439,orf00013,A3,FALSE,D,ala(90.0);gly(75.0);ala-d(65.0);ser(60.0);ile(55.0),28901.0,50819.0,+,BGC0000439,"#,0,1,2,3,4,5,6,7,8,9,10,11",7,@D-Ala,D,ala,"12,11,10,9,8,7,6,5,4,3*,2,1,0,#",7,Ala,, +BGC0000439,orf00013,A4,FALSE,L,asp(90.0);asn(85.0);gln(60.0);glu(60.0);hty(60.0),28901.0,50819.0,+,BGC0000439,"#,0,1,2,3,4,5,6,7,8,9,10,11",8,@L-Asp,L,asp,"12,11,10,9,8,7,6,5,4,3*,2,1,0,#",8,Asp,, +BGC0000439,orf00013,A5,FALSE,L,gly(95.0);ala(70.0);ile(65.0);val(65.0);leu(60.0),28901.0,50819.0,+,BGC0000439,"#,0,1,2,3,4,5,6,7,8,9,10,11",9,Gly,NA,gly,"12,11,10,9,8,7,6,5,4,3*,2,1,0,#",9,Gly,, +BGC0000439,orf00013,A6,FALSE,D,ala(90.0);gly(80.0);leu(70.0);val(70.0);ala-d(65.0),28901.0,50819.0,+,BGC0000439,"#,0,1,2,3,4,5,6,7,8,9,10,11",10,@D-Ala,D,ala,"12,11,10,9,8,7,6,5,4,3*,2,1,0,#",10,Ala,, +BGC0000439,orf00014,A1,FALSE,L,asn(80.0);asp(80.0);glu(60.0);gln(50.0);lys(50.0),50820.0,57978.0,+,BGC0000439,"#,0,1,2,3,4,5,6,7,8,9,10,11",11,@L-3Me-Glu,L,glu+MT,"12,11,10,9,8,7,6,5,4,3*,2,1,0,#",11,Glu,, +BGC0000439,orf00014,A2,FALSE,L,phe(80.0);trp(70.0);tyr(70.0);bht(70.0);bmt(70.0),50820.0,57978.0,+,BGC0000439,"#,0,1,2,3,4,5,6,7,8,9,10,11",-,-,-,-,"12,11,10,9,8,7,6,5,4,3*,2,1,0,#",12,4-Cl-Kyn,, +BGC0000443,orf00007,A1,FALSE,L,ser(100.0);hpg(70.0);dpg(65.0);thr(60.0);dab(60.0),5608.0,21733.0,+,BGC0000443,"#,0,1,2,3,4,5,6,7",0,Ser,NA,ser,"0,1,2,3,4,5,6,7,8*",0,Ser,, +BGC0000443,orf00007,A2,FALSE,D,asp(70.0);cit(70.0);asn(65.0);gln(60.0);glu(60.0),5608.0,21733.0,+,BGC0000443,"#,0,1,2,3,4,5,6,7",1,X1,NA,none,"0,1,2,3,4,5,6,7,8*",1,Orn,, +BGC0000443,orf00007,A3,FALSE,L,asp(100.0);glu(80.0);gln(75.0);asn(70.0);cit(60.0),5608.0,21733.0,+,BGC0000443,"#,0,1,2,3,4,5,6,7",2,Asp,NA,asp,"0,1,2,3,4,5,6,7,8*",2,Asp,, +BGC0000443,orf00007,A4,FALSE,D,asn(65.0);asp(65.0);gln(60.0);glu(60.0);aad(60.0),5608.0,21733.0,+,BGC0000443,"#,0,1,2,3,4,5,6,7",3,Hse,NA,hse,"0,1,2,3,4,5,6,7,8*",3,Lys,, +BGC0000443,orf00007,A5,FALSE,L,leu(70.0);val(65.0);ala(60.0);gly(60.0);ile(60.0),5608.0,21733.0,+,BGC0000443,"#,0,1,2,3,4,5,6,7",4,X2,NA,none,"0,1,2,3,4,5,6,7,8*",4,His,, +BGC0000443,orf00008,A1,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),21736.0,35314.0,+,BGC0000443,"#,0,1,2,3,4,5,6,7",5,X0,NA,none,"0,1,2,3,4,5,6,7,8*",5,Thr,, +BGC0000443,orf00008,A2,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),21736.0,35314.0,+,BGC0000443,"#,0,1,2,3,4,5,6,7",6,dhAbu,NA,abu+unk,"0,1,2,3,4,5,6,7,8*",6,Dhb,, +BGC0000443,orf00008,A3,FALSE,L,asp(100.0);glu(80.0);gln(75.0);asn(70.0);cit(60.0),21736.0,35314.0,+,BGC0000443,"#,0,1,2,3,4,5,6,7",7,OH-Asp,NA,asp+unk,"0,1,2,3,4,5,6,7,8*",7,OH-Asp,, +BGC0000443,orf00009,A1,FALSE,L,thr(100.0);allothr(90.0);ser(80.0);dht(80.0);hpg(60.0),35639.0,37484.0,+,BGC0000443,"#,0,1,2,3,4,5,6,7",8,4Cl-Thr,NA,thr+unk,"0,1,2,3,4,5,6,7,8*",8,4Cl-Thr,, +BGC0000447,orf00004,A1,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),4794.0,14454.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",0,dhAbu,NA,abu+unk,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",0,"2,3‐didehydrobutyrine",, +BGC0000447,orf00004,A2,FALSE,D,pro(95.0);pip(70.0);uda(60.0);ala(55.0);gly(50.0),4794.0,14454.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",1,@D-Pro,D,pro,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",1,D-Pro,, +BGC0000447,orf00004,A3,FALSE,D,ser(90.0);met(40.0);dab(40.0);dht(40.0);bmt(40.0),4794.0,14454.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",2,@L-Ser,L,ser,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",2,D-Ser,, +BGC0000447,orf00005,A1,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),14450.0,24293.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",3,@L-Leu,L,leu,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",3,D-Leu,, +BGC0000447,orf00005,A2,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),14450.0,24293.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",4,@D-Val,D,val,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",4,D-Val,, +BGC0000447,orf00005,A3,FALSE,D,ser(100.0);thr(70.0);hpg(65.0);allothr(60.0);dpg(60.0),14450.0,24293.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",5,@D-Ser,D,ser,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",5,D-Ser,, +BGC0000447,orf00006,A1,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),24289.0,34258.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",6,@D-Leu,D,leu,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",6,D-Leu,, +BGC0000447,orf00006,A2,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),24289.0,34258.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",7,@D-Val,D,val,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",7,D-Val,, +BGC0000447,orf00006,A3,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),24289.0,34258.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",8,@L-Val,L,val,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",8,L-Val/D-Val,, +BGC0000447,orf00007,A1,FALSE,D,gln(95.0);glu(80.0);asp(65.0);asn(60.0);arg(55.0),34215.0,44247.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",9,@D-Gln,D,gln,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",9,D-Gln,, +BGC0000447,orf00007,A2,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),34215.0,44247.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",10,@L-Leu,L,leu,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",10,L-Leu,, +BGC0000447,orf00007,A3,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),34215.0,44247.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",11,@D-Val,D,val,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",11,D-Val,, +BGC0000447,orf00008,A1,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),44557.0,65716.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",12,dhAbu,NA,abu+unk,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",12,"2,3‐didehydrobutyrine",, +BGC0000447,orf00008,A2,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),44557.0,65716.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",13,@D-aThr/Thr,D,thr,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",13,D-allo-Thr,, +BGC0000447,orf00008,A3,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),44557.0,65716.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",14,@L-Ile/aIle,L,ile,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",14,L-Ile/L-Leu,, +BGC0000447,orf00008,A4,FALSE,L,asn(65.0);asp(65.0);gln(60.0);glu(60.0);aad(60.0),44557.0,65716.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",15,@L-Hse,L,hse,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",15,L-Hse,, +BGC0000447,orf00008,A5,FALSE,D,dab(100.0);ala(65.0);ala-d(60.0);val(60.0);gly(55.0),44557.0,65716.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",16,@D-Dab,D,dab,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",16,D-Dab,, +BGC0000447,orf00008,A6,FALSE,L,bmt(50.0);end(50.0);cha(50.0);dab(40.0);hyv(40.0),44557.0,65716.0,+,BGC0000447.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16",17,@L-Lys,L,lys,"17,16,15,14,13*,12,11,10,9,8,7,6,5,4,3,2,1,0",17,L-Lys,, +BGC0000452,orf00007,A1,FALSE,D,phe(100.0);tyr(80.0);bht(75.0);trp(70.0);uda(60.0),8543.0,11888.0,+,BGC0000452,"0,1,2,3,4,5,6,7,8,9*",4,@D-Phe,D,phe,"0,1,2,3,4,5,6,7,8,9*",0,D-Phe,, +BGC0000452,orf00008,A1,FALSE,L,pro(100.0);pip(75.0);uda(60.0);ala(55.0);ser(55.0),11982.0,22752.0,+,BGC0000452,"0,1,2,3,4,5,6,7,8,9*",5,@L-Pro,L,pro,"0,1,2,3,4,5,6,7,8,9*",1,Pro,, +BGC0000452,orf00008,A2,FALSE,L,phe(85.0);tyr(70.0);trp(65.0);bht(65.0);bmt(60.0),11982.0,22752.0,+,BGC0000452,"0,1,2,3,4,5,6,7,8,9*",6,@L-Phe,L,phe,"0,1,2,3,4,5,6,7,8,9*",2,Phe/Trp,, +BGC0000452,orf00008,A3,FALSE,D,phe(100.0);tyr(80.0);bht(75.0);trp(70.0);uda(60.0),11982.0,22752.0,+,BGC0000452,"0,1,2,3,4,5,6,7,8,9*",7,@D-Phe,D,phe,"0,1,2,3,4,5,6,7,8,9*",3,D-Phe/D-Trp,, +BGC0000452,orf00009,A1,FALSE,L,asn(100.0);asp(85.0);glu(65.0);gln(60.0);aad(55.0),22857.0,42321.0,+,BGC0000452,"0,1,2,3,4,5,6,7,8,9*",8,@L-Asn,L,asn,"0,1,2,3,4,5,6,7,8,9*",4,Asn,, +BGC0000452,orf00009,A2,FALSE,L,gln(100.0);glu(80.0);asp(65.0);asn(60.0);arg(55.0),22857.0,42321.0,+,BGC0000452,"0,1,2,3,4,5,6,7,8,9*",9,@L-Gln,L,gln,"0,1,2,3,4,5,6,7,8,9*",5,Gln,, +BGC0000452,orf00009,A3,FALSE,L,tyr(95.0);trp(70.0);phe(65.0);bht(65.0);phg(55.0),22857.0,42321.0,+,BGC0000452,"0,1,2,3,4,5,6,7,8,9*",0,@L-Tyr,L,tyr,"0,1,2,3,4,5,6,7,8,9*",6,Tyr,, +BGC0000452,orf00009,A4,FALSE,L,val(100.0);ile(80.0);abu(80.0);leu(75.0);ala(65.0),22857.0,42321.0,+,BGC0000452,"0,1,2,3,4,5,6,7,8,9*",1,@L-Val,L,val,"0,1,2,3,4,5,6,7,8,9*",7,Val,, +BGC0000452,orf00009,A5,FALSE,L,orn(100.0);dab(90.0);lys(75.0);arg(70.0);gln(50.0),22857.0,42321.0,+,BGC0000452,"0,1,2,3,4,5,6,7,8,9*",2,@L-Orn,L,orn,"0,1,2,3,4,5,6,7,8,9*",8,Orn/Lys,, +BGC0000452,orf00009,A6,FALSE,L,leu(100.0);ile(70.0);val(65.0);ala-d(60.0);gly(60.0),22857.0,42321.0,+,BGC0000452,"0,1,2,3,4,5,6,7,8,9*",3,@L-Leu,L,leu,"0,1,2,3,4,5,6,7,8,9*",9,Leu,, +BGC0000461,orf00003,A1,FALSE,L,glu(60.0);arg(55.0);asp(55.0);lys(55.0);orn(55.0),783.0,22974.0,-,BGC0000461,"#,0,1,2,3,4,5,6,7,8,9,10",7,Glu,NA,glu,"#,0,1,2,3,4,5,6,7,8,9,10,11*",7,Glu,, +BGC0000461,orf00003,A2,FALSE,D,asn(95.0);asp(80.0);gln(65.0);glu(65.0);aad(55.0),783.0,22974.0,-,BGC0000461,"#,0,1,2,3,4,5,6,7,8,9,10",8,Asn,NA,asn,"#,0,1,2,3,4,5,6,7,8,9,10,11*",8,Asn,, +BGC0000461,orf00003,A3,FALSE,D,leu(75.0);val(70.0);ala(65.0);gly(65.0);ile(65.0),783.0,22974.0,-,BGC0000461,"#,0,1,2,3,4,5,6,7,8,9,10",9,Trp,NA,trp,"#,0,1,2,3,4,5,6,7,8,9,10,11*",9,Trp,, +BGC0000461,orf00003,A4,FALSE,D,glu(65.0);arg(60.0);cit(60.0);asp(55.0);gln(55.0),783.0,22974.0,-,BGC0000461,"#,0,1,2,3,4,5,6,7,8,9,10",10,Orn,NA,orn,"#,0,1,2,3,4,5,6,7,8,9,10,11*",10,Orn,, +BGC0000461,orf00003,A5,TRUE,L,val(100.0);abu(90.0);ile(75.0);leu(70.0);ala(60.0),783.0,22974.0,-,BGC0000461,"#,0,1,2,3,4,5,6,7,8,9,10",11,NMe-Val,NA,val+MT,"#,0,1,2,3,4,5,6,7,8,9,10,11*",11,N-Me-Val,, +BGC0000461,orf00004,A1,FALSE,L,ser(100.0);thr(65.0);hpg(65.0);dab(60.0);dpg(60.0),22954.0,50827.0,-,BGC0000461,"#,0,1,2,3,4,5,6,7,8,9,10",0,Ser,NA,ser,"#,0,1,2,3,4,5,6,7,8,9,10,11*",0,Ser,, +BGC0000461,orf00004,A2,FALSE,D,asn(95.0);asp(80.0);gln(65.0);glu(65.0);aad(55.0),22954.0,50827.0,-,BGC0000461,"#,0,1,2,3,4,5,6,7,8,9,10",1,OH-Asn,NA,asn+unk,"#,0,1,2,3,4,5,6,7,8,9,10,11*",1,D-OHAsn/D-Asn,, +BGC0000461,orf00004,A3,FALSE,L,ser(100.0);thr(65.0);hpg(65.0);dab(60.0);dpg(60.0),22954.0,50827.0,-,BGC0000461,"#,0,1,2,3,4,5,6,7,8,9,10",2,Ser,NA,ser,"#,0,1,2,3,4,5,6,7,8,9,10,11*",2,Ser,, +BGC0000461,orf00004,A4,FALSE,L,gly(95.0);ala(75.0);ile(70.0);leu(65.0);val(65.0),22954.0,50827.0,-,BGC0000461,"#,0,1,2,3,4,5,6,7,8,9,10",3,Gly,NA,gly,"#,0,1,2,3,4,5,6,7,8,9,10,11*",3,Gly,, +BGC0000461,orf00004,A5,TRUE,D,phe(95.0);tyr(85.0);bht(80.0);trp(75.0);uda(70.0),22954.0,50827.0,-,BGC0000461,"#,0,1,2,3,4,5,6,7,8,9,10",4,NMe-Phe,NA,phe+MT,"#,0,1,2,3,4,5,6,7,8,9,10,11*",4,N-Me-Phe,, +BGC0000461,orf00004,A6,FALSE,L,phe(75.0);bht(75.0);tyr(70.0);trp(65.0);phg(60.0),22954.0,50827.0,-,BGC0000461,"#,0,1,2,3,4,5,6,7,8,9,10",5,Leu,NA,leu,"#,0,1,2,3,4,5,6,7,8,9,10,11*",5,Leu,, +BGC0000461,orf00004,A7,FALSE,D,glu(60.0);arg(55.0);asp(55.0);gln(55.0);lys(50.0),22954.0,50827.0,-,BGC0000461,"#,0,1,2,3,4,5,6,7,8,9,10",6,Orn,NA,orn,"#,0,1,2,3,4,5,6,7,8,9,10,11*",6,Orn,, +BGC0000463,-,-,,,,,,,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",0,Leu,NA,leu,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",0,Leu,, +BGC0000463,-,-,,,,,,,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",1,Glu,NA,glu,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",1,Glu,, +BGC0000463,orf00001,A1,FALSE,D,asp(80.0);asn(75.0);gln(75.0);glu(75.0);arg(60.0),0.0,25461.0,+,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",2,Gln,NA,gln,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",2,Gln,, +BGC0000463,orf00001,A2,FALSE,D,val(95.0);abu(80.0);ile(75.0);leu(75.0);ala(60.0),0.0,25461.0,+,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",3,Val,NA,val,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",3,Val,, +BGC0000463,orf00001,A3,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),0.0,25461.0,+,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",4,Leu,NA,leu,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",4,Leu,, +BGC0000463,orf00001,A4,FALSE,D,asp(80.0);asn(75.0);gln(75.0);glu(75.0);arg(60.0),0.0,25461.0,+,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",5,Gln,NA,gln,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",5,Gln,, +BGC0000463,orf00001,A5,FALSE,D,ser(95.0);hpg(70.0);thr(65.0);dpg(65.0);allothr(60.0),0.0,25461.0,+,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",6,Ser,NA,ser,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",6,Ser,, +BGC0000463,orf00001,A6,FALSE,D,val(95.0);abu(80.0);ile(75.0);leu(75.0);ala(60.0),0.0,25461.0,+,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",7,Val,NA,val,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",7,Val,, +BGC0000463,orf00001,A7,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),0.0,25461.0,+,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",8,Leu,NA,leu,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",8,Leu,, +BGC0000463,orf00001,A8,FALSE,D,asp(80.0);asn(75.0);gln(75.0);glu(75.0);arg(60.0),0.0,25461.0,+,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",9,Gln,NA,gln,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",9,Gln,, +BGC0000463,orf00002,A1,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),25457.0,39980.0,+,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",10,Leu,NA,leu,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",10,Leu,, +BGC0000463,orf00002,A2,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),25457.0,39980.0,+,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",11,Leu,NA,leu,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",11,Leu,, +BGC0000463,orf00002,A3,FALSE,D,asp(80.0);asn(75.0);gln(75.0);glu(75.0);arg(60.0),25457.0,39980.0,+,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",12,Gln,NA,gln,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",12,Gln,, +BGC0000463,orf00002,A4,FALSE,L,ile(90.0);leu(85.0);val(85.0);abu(70.0);ala(65.0),25457.0,39980.0,+,BGC0000463.1,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",13,Ile/aIle,NA,ile,"13,12,11,10,9,8,7,6*,5,4,3,2,1,0",13,Ile,, +BGC0000464,orf00008,A1,FALSE,L,pro(100.0);pip(75.0);uda(60.0);ala(55.0);ala-d(50.0),8762.0,31655.0,+,BGC0000464.1,"#,0,1,2,3,4,5,6,7,8,9,10,11",0,@L-Pro,L,pro,"12,11,10,9,8,7,6,5*,4,3,2,1,0",0,Pro,, +BGC0000464,orf00008,A2,FALSE,D,ala(90.0);gly(75.0);val(65.0);ala-d(60.0);ile(60.0),8762.0,31655.0,+,BGC0000464.1,"#,0,1,2,3,4,5,6,7,8,9,10,11",1,@D-Ala,D,ala,"12,11,10,9,8,7,6,5*,4,3,2,1,0",1,Ala,, +BGC0000464,orf00008,A3,FALSE,L,val(95.0);ile(80.0);abu(80.0);leu(75.0);ala(60.0),8762.0,31655.0,+,BGC0000464.1,"#,0,1,2,3,4,5,6,7,8,9,10,11",2,@L-Val,L,val,"12,11,10,9,8,7,6,5*,4,3,2,1,0",2,Val,, +BGC0000464,orf00008,A4,FALSE,D,phe(75.0);trp(75.0);bht(75.0);tyr(70.0);met(60.0),8762.0,31655.0,+,BGC0000464.1,"#,0,1,2,3,4,5,6,7,8,9,10,11",3,@D-Leu,D,leu,"12,11,10,9,8,7,6,5*,4,3,2,1,0",3,Ile,, +BGC0000464,orf00008,A5,FALSE,L,ile(85.0);leu(80.0);val(80.0);abu(70.0);gly(65.0),8762.0,31655.0,+,BGC0000464.1,"#,0,1,2,3,4,5,6,7,8,9,10,11",4,@L-Ile/aIle,L,ile,"12,11,10,9,8,7,6,5*,4,3,2,1,0",4,Leu,, +BGC0000464,orf00008,A6,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),8762.0,31655.0,+,BGC0000464.1,"#,0,1,2,3,4,5,6,7,8,9,10,11",5,@D-aThr/Thr,D,thr,"12,11,10,9,8,7,6,5*,4,3,2,1,0",5,Thr,, +BGC0000464,orf00008,A7,FALSE,L,val(95.0);ile(80.0);abu(80.0);leu(75.0);ala(60.0),8762.0,31655.0,+,BGC0000464.1,"#,0,1,2,3,4,5,6,7,8,9,10,11",6,@L-Val,L,val,"12,11,10,9,8,7,6,5*,4,3,2,1,0",6,Val,, +BGC0000464,orf00009,A1,FALSE,D,ile(85.0);leu(80.0);val(80.0);abu(70.0);gly(65.0),31669.0,38383.0,+,BGC0000464.1,"#,0,1,2,3,4,5,6,7,8,9,10,11",7,@D-Val,D,val,"12,11,10,9,8,7,6,5*,4,3,2,1,0",7,Val,, +BGC0000464,orf00009,A2,FALSE,L,val(95.0);ile(80.0);abu(80.0);leu(75.0);ala(60.0),31669.0,38383.0,+,BGC0000464.1,"#,0,1,2,3,4,5,6,7,8,9,10,11",8,@L-Val,L,val,"12,11,10,9,8,7,6,5*,4,3,2,1,0",8,Val,, +BGC0000464,orf00010,A1,FALSE,D,ala(100.0);gly(70.0);ala-d(65.0);ile(60.0);val(60.0),38397.0,53205.0,+,BGC0000464.1,"#,0,1,2,3,4,5,6,7,8,9,10,11",9,@D-Ala,D,ala,"12,11,10,9,8,7,6,5*,4,3,2,1,0",9,Ala,, +BGC0000464,orf00010,A2,FALSE,L,tyr(60.0);b-ala(60.0);trp(55.0);bht(55.0);phe(50.0),38397.0,53205.0,+,BGC0000464.1,"#,0,1,2,3,4,5,6,7,8,9,10,11",10,bAla,NA,b-ala,"12,11,10,9,8,7,6,5*,4,3,2,1,0",10,Ala,, +BGC0000464,orf00010,A3,FALSE,L,pro(100.0);pip(75.0);uda(60.0);ala(55.0);ala-d(50.0),38397.0,53205.0,+,BGC0000464.1,"#,0,1,2,3,4,5,6,7,8,9,10,11",11,@D-Pro,D,pro,"12,11,10,9,8,7,6,5*,4,3,2,1,0",11,Pro,, +BGC0000464,orf00010,A4,FALSE,L,val(95.0);ile(80.0);abu(80.0);leu(75.0);ala(60.0),38397.0,53205.0,+,BGC0000464.1,"#,0,1,2,3,4,5,6,7,8,9,10,11",12,@L-Val,L,val,"12,11,10,9,8,7,6,5*,4,3,2,1,0",12,Val,, +BGC0001127,-,-,,,,,,,BGC0001127,"#,0,1,2,3,4,5,6,7",0,dhAbu,NA,abu+unk,,,,, +BGC0001127,-,-,,,,,,,BGC0001127,"#,0,1,2,3,4,5,6,7",6,dhAbu,NA,abu+unk,,,,, +BGC0001127,orf00008,A1,FALSE,L,thr(95.0);allothr(95.0);dht(90.0);ser(75.0);hpg(60.0),6245.0,11117.0,+,BGC0001127,"#,0,1,2,3,4,5,6,7",1,@L-aThr/Thr,L,thr,,,,, +BGC0001127,orf00009,A1,FALSE,D,thr(95.0);allothr(95.0);dht(90.0);ser(75.0);hpg(60.0),11180.0,19943.0,+,BGC0001127,"#,0,1,2,3,4,5,6,7",2,@D-aThr/Thr,D,thr,,,,, +BGC0001127,orf00009,A2,FALSE,D,tyr(95.0);bht(90.0);phe(80.0);trp(65.0);uda(60.0),11180.0,19943.0,+,BGC0001127,"#,0,1,2,3,4,5,6,7",3,@D-Tyr,D,tyr,,,,, +BGC0001127,orf00010,A1,FALSE,L,thr(100.0);allothr(90.0);ser(80.0);dht(80.0);hpg(60.0),20029.0,30973.0,+,BGC0001127,"#,0,1,2,3,4,5,6,7",-,-,-,-,,,,, +BGC0001127,orf00010,A2,FALSE,D,gln(100.0);glu(80.0);asp(65.0);asn(60.0);arg(55.0),20029.0,30973.0,+,BGC0001127,"#,0,1,2,3,4,5,6,7",4,@D-Gln,D,gln,,,,, +BGC0001127,orf00010,A3,FALSE,L,gly(95.0);ala(70.0);leu(65.0);val(65.0);abu(60.0),20029.0,30973.0,+,BGC0001127,"#,0,1,2,3,4,5,6,7",5,Gly,NA,gly,,,,, +BGC0001127,orf00011,A1,FALSE,L,thr(100.0);allothr(90.0);ser(80.0);dht(80.0);hpg(60.0),30989.0,39770.0,+,BGC0001127,"#,0,1,2,3,4,5,6,7",7,@L-aThr/Thr,L,thr,,,,, +BGC0001127,orf00011,A2,FALSE,L,leu(70.0);val(65.0);ala(60.0);gly(60.0);ile(60.0),30989.0,39770.0,+,BGC0001127,"#,0,1,2,3,4,5,6,7",8,@L-His,L,his,,,,, +BGC0001131,orf00006,A1,FALSE,D,ser(100.0);hpg(65.0);thr(60.0);dab(60.0);dpg(60.0),7988.0,27977.0,+,BGC0001131,"0,1,2,3,4,5*",0,@D-Ser,D,ser,"0,1,2,3,4,5*",0,D-Ser,, +BGC0001131,orf00006,A2,FALSE,L,arg(75.0);gln(75.0);lys(75.0);glu(70.0);uda(70.0),7988.0,27977.0,+,BGC0001131,"0,1,2,3,4,5*",1,@L-Gln,L,gln,"0,1,2,3,4,5*",1,L-Gln,, +BGC0001131,orf00006,A3,FALSE,D,gly(65.0);ala(60.0);ile(60.0);leu(60.0);val(60.0),7988.0,27977.0,+,BGC0001131,"0,1,2,3,4,5*",2,@D-Phe,D,phe,"0,1,2,3,4,5*",2,D-Phe,, +BGC0001131,orf00006,A4,FALSE,L,phe(75.0);trp(75.0);tyr(70.0);bht(70.0);met(60.0),7988.0,27977.0,+,BGC0001131,"0,1,2,3,4,5*",3,@L-Leu,L,leu,"0,1,2,3,4,5*",3,L-Leu,, +BGC0001131,orf00006,A5,FALSE,D,gly(65.0);ala(60.0);ile(60.0);leu(60.0);val(60.0),7988.0,27977.0,+,BGC0001131,"0,1,2,3,4,5*",4,@D-Phe,D,phe,"0,1,2,3,4,5*",4,D-Phe,, +BGC0001131,orf00006,A6,FALSE,L,lys(65.0);arg(60.0);bmt(60.0);end(60.0);glu(55.0),7988.0,27977.0,+,BGC0001131,"0,1,2,3,4,5*",5,@L-Lys,L,lys,"0,1,2,3,4,5*",5,L-Lys,, +BGC0001133,orf00024,A1,FALSE,D,leu(85.0);ile(75.0);val(75.0);abu(70.0);ala(65.0),20000.0,31835.0,-,BGC0001133,"#,0,1,2,3,4,5",4,@D-Leu,D,leu,"0,1,2,3,4,5,6*",4,Leu,, +BGC0001133,orf00024,A2,FALSE,L,leu(85.0);ile(75.0);val(75.0);abu(70.0);ala(65.0),20000.0,31835.0,-,BGC0001133,"#,0,1,2,3,4,5",5,@L-Leu,L,leu,"0,1,2,3,4,5,6*",5,Leu,, +BGC0001133,orf00024,A3,FALSE,L,ser(100.0);hpg(65.0);thr(60.0);dpg(60.0);allothr(55.0),20000.0,31835.0,-,BGC0001133,"#,0,1,2,3,4,5",6,@L-Ala,L,ala,"0,1,2,3,4,5,6*",6,L-Ala,, +BGC0001133,orf00025,A1,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),31827.0,45888.0,-,BGC0001133,"#,0,1,2,3,4,5",0,@L-aThr/Thr,L,thr,"0,1,2,3,4,5,6*",0,L-Thr,, +BGC0001133,orf00025,A2,FALSE,L,ser(100.0);hpg(65.0);thr(60.0);dpg(60.0);allothr(55.0),31827.0,45888.0,-,BGC0001133,"#,0,1,2,3,4,5",1,@L-Ala,L,ala,"0,1,2,3,4,5,6*",1,L-Ala,, +BGC0001133,orf00025,A3,FALSE,L,leu(85.0);ile(75.0);val(75.0);abu(70.0);ala(65.0),31827.0,45888.0,-,BGC0001133,"#,0,1,2,3,4,5",2,@L-Leu,L,leu,"0,1,2,3,4,5,6*",2,Leu,, +BGC0001133,orf00025,A4,FALSE,D,leu(85.0);ile(75.0);val(75.0);abu(70.0);ala(65.0),31827.0,45888.0,-,BGC0001133,"#,0,1,2,3,4,5",3,@D-Leu,D,leu,"0,1,2,3,4,5,6*",3,Leu,, +BGC0001153,orf00001,A1,FALSE,D,phe(90.0);trp(65.0);uda(60.0);dab(50.0);bmt(50.0),8.0,14765.0,+,BGC0001153,"#,0,1,2,3,4,5,6,7,8",5,@D-Phe,D,phe,"9,8,7,6,5,4,3*,2,1,0",5,D-Ple,, +BGC0001153,orf00001,A2,FALSE,L,ile(90.0);ala(75.0);leu(75.0);val(75.0);gly(70.0),8.0,14765.0,+,BGC0001153,"#,0,1,2,3,4,5,6,7,8",6,@L-Leu,L,leu,"9,8,7,6,5,4,3*,2,1,0",6,L-Leu,, +BGC0001153,orf00001,A3,FALSE,L,dab(100.0);orn(90.0);arg(70.0);lys(65.0);uda(50.0),8.0,14765.0,+,BGC0001153,"#,0,1,2,3,4,5,6,7,8",7,@L-Dab,L,dab,"9,8,7,6,5,4,3*,2,1,0",7,Dab,, +BGC0001153,orf00001,A4,FALSE,L,dab(100.0);orn(90.0);arg(70.0);lys(65.0);uda(50.0),8.0,14765.0,+,BGC0001153,"#,0,1,2,3,4,5,6,7,8",8,@L-Dab,L,dab,"9,8,7,6,5,4,3*,2,1,0",8,Dab,, +BGC0001153,orf00002,A1,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),14919.0,18228.0,+,BGC0001153,"#,0,1,2,3,4,5,6,7,8",9,@L-aThr/Thr,L,thr,"9,8,7,6,5,4,3*,2,1,0",9,Thr,, +BGC0001153,orf00007,A1,FALSE,L,dab(100.0);orn(95.0);lys(70.0);arg(65.0);aeo(40.0),22064.0,40826.0,+,BGC0001153,"#,0,1,2,3,4,5,6,7,8",0,@L-Dab,L,dab,"9,8,7,6,5,4,3*,2,1,0",0,Dab,, +BGC0001153,orf00007,A2,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),22064.0,40826.0,+,BGC0001153,"#,0,1,2,3,4,5,6,7,8",1,@L-aThr/Thr,L,thr,"9,8,7,6,5,4,3*,2,1,0",1,Thr,, +BGC0001153,orf00007,A3,FALSE,D,dab(100.0);orn(95.0);lys(70.0);arg(65.0);aeo(40.0),22064.0,40826.0,+,BGC0001153,"#,0,1,2,3,4,5,6,7,8",2,@L-Dab,L,dab,"9,8,7,6,5,4,3*,2,1,0",2,L-Dab,, +BGC0001153,orf00007,A4,FALSE,L,dab(100.0);orn(90.0);arg(70.0);lys(65.0);uda(50.0),22064.0,40826.0,+,BGC0001153,"#,0,1,2,3,4,5,6,7,8",3,@L-Dab,L,dab,"9,8,7,6,5,4,3*,2,1,0",3,Dab,, +BGC0001153,orf00007,A5,FALSE,L,dab(100.0);orn(90.0);arg(70.0);lys(65.0);uda(50.0),22064.0,40826.0,+,BGC0001153,"#,0,1,2,3,4,5,6,7,8",4,@L-Dab,L,dab,"9,8,7,6,5,4,3*,2,1,0",4,Dab,, +BGC0001189,orf00005,A1,FALSE,L,asp(100.0);asn(90.0);glu(65.0);gln(60.0);cha(60.0),6173.0,9353.0,+,BGC0001189,"#,0,1,2,3,4",0,@L-OH-Asp,L,asp+unk,"0,1,2,3,4,5",0,Asp,,L-Asp +BGC0001189,orf00006,A1,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),9377.0,17168.0,+,BGC0001189,"#,0,1,2,3,4",1,@D-aThr/Thr,D,thr,"0,1,2,3,4,5",1,alloThr,,D-allo-Thr +BGC0001189,orf00006,A2,FALSE,L,asp(100.0);asn(90.0);glu(65.0);gln(60.0);cha(60.0),9377.0,17168.0,+,BGC0001189,"#,0,1,2,3,4",2,@L-Asp,L,asp,"0,1,2,3,4,5",2,Asp,,L-Asp +BGC0001189,orf00007,A1,FALSE,D,orn(65.0);apa(60.0);ahp(60.0);arg(55.0);gln(50.0),17168.0,29036.0,+,BGC0001189,"#,0,1,2,3,4",3,@D-OH-Orn,D,orn+unk,"0,1,2,3,4,5",3,Orn,,D-Nδ-hydroxy-Orn +BGC0001189,orf00007,A2,FALSE,L,ser(100.0);thr(65.0);hpg(65.0);dab(60.0);dpg(60.0),17168.0,29036.0,+,BGC0001189,"#,0,1,2,3,4",4,@L-Ser,L,ser,"0,1,2,3,4,5",4,Ser,,L-Ser +BGC0001189,orf00007,A3,FALSE,L,orn(65.0);apa(60.0);ahp(60.0);arg(55.0);gln(50.0),17168.0,29036.0,+,BGC0001189,"#,0,1,2,3,4",-,-,-,-,"0,1,2,3,4,5",5,Orn,,L-Nδ-hydroxy-Orn +BGC0001192,orf00001,A1,FALSE,D,phe(80.0);leu(75.0);trp(75.0);pro(60.0);pip(60.0),0.0,14997.0,+,BGC0001192.1,"#,0,1,2,3,4,5,6,7,8",5,@D-Leu,D,leu,"9,8,7,6,5,4*,3,2,1,0",5,D-Leu,, +BGC0001192,orf00001,A2,FALSE,L,ile(100.0);leu(80.0);val(75.0);ala(70.0);abu(70.0),0.0,14997.0,+,BGC0001192.1,"#,0,1,2,3,4,5,6,7,8",6,@L-Leu,L,leu,"9,8,7,6,5,4*,3,2,1,0",6,L-Leu,, +BGC0001192,orf00001,A3,FALSE,L,dab(100.0);orn(90.0);arg(70.0);lys(65.0);uda(50.0),0.0,14997.0,+,BGC0001192.1,"#,0,1,2,3,4,5,6,7,8",7,@L-Dab,L,dab,"9,8,7,6,5,4*,3,2,1,0",7,L-Dab,, +BGC0001192,orf00001,A4,FALSE,L,dab(100.0);orn(90.0);arg(70.0);lys(65.0);uda(50.0),0.0,14997.0,+,BGC0001192.1,"#,0,1,2,3,4,5,6,7,8",8,@L-Dab,L,dab,"9,8,7,6,5,4*,3,2,1,0",8,L-Dab,, +BGC0001192,orf00002,A1,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),15149.0,18458.0,+,BGC0001192.1,"#,0,1,2,3,4,5,6,7,8",9,@L-aThr/Thr,L,thr,"9,8,7,6,5,4*,3,2,1,0",9,L-Thr,, +BGC0001192,orf00005,A1,FALSE,L,dab(100.0);orn(95.0);lys(70.0);arg(65.0);aeo(40.0),22233.0,41172.0,+,BGC0001192.1,"#,0,1,2,3,4,5,6,7,8",0,@L-Dab,L,dab,"9,8,7,6,5,4*,3,2,1,0",0,L-Dab,, +BGC0001192,orf00005,A2,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),22233.0,41172.0,+,BGC0001192.1,"#,0,1,2,3,4,5,6,7,8",1,@L-aThr/Thr,L,thr,"9,8,7,6,5,4*,3,2,1,0",1,L-Thr,, +BGC0001192,orf00005,A3,FALSE,D,orn(90.0);dab(90.0);lys(70.0);arg(65.0);met(40.0),22233.0,41172.0,+,BGC0001192.1,"#,0,1,2,3,4,5,6,7,8",2,@L-Dab,L,dab,"9,8,7,6,5,4*,3,2,1,0",2,D-Dab,, +BGC0001192,orf00005,A4,FALSE,L,dab(100.0);orn(90.0);arg(70.0);lys(65.0);uda(50.0),22233.0,41172.0,+,BGC0001192.1,"#,0,1,2,3,4,5,6,7,8",3,@L-Dab,L,dab,"9,8,7,6,5,4*,3,2,1,0",3,L-Dab,, +BGC0001192,orf00005,A5,FALSE,L,dab(100.0);orn(90.0);arg(70.0);lys(65.0);uda(50.0),22233.0,41172.0,+,BGC0001192.1,"#,0,1,2,3,4,5,6,7,8",4,@L-Dab,L,dab,"9,8,7,6,5,4*,3,2,1,0",4,L-Dab,, +BGC0001196,orf00010,A1,FALSE,L,asn(100.0);asp(90.0);glu(65.0);gln(60.0);cha(60.0),7767.0,15696.0,-,BGC0001196,"0,1,2,3,4,5*",0,@L-Asn,L,asn,"0,1,2,3,4,5,6*",4,L-Asn,, +BGC0001196,orf00010,A2,FALSE,D,gly(100.0);ala(75.0);leu(65.0);val(65.0);ile(60.0),7767.0,15696.0,-,BGC0001196,"0,1,2,3,4,5*",1,Gly,NA,gly,"0,1,2,3,4,5,6*",5,Gly,, +BGC0001196,orf00011,A1,FALSE,D,leu(80.0);ile(70.0);val(70.0);hty(70.0);ala(65.0),15837.0,22680.0,-,BGC0001196,"0,1,2,3,4,5*",4,@D-Leu,D,leu,"0,1,2,3,4,5,6*",2,D-Leu,, +BGC0001196,orf00011,A2,FALSE,L,val(85.0);leu(80.0);abu(75.0);ile(70.0);cha(70.0),15837.0,22680.0,-,BGC0001196,"0,1,2,3,4,5*",5,@L-Ile/aIle,L,ile,"0,1,2,3,4,5,6*",3,L-allo-Ile/L-Val,, +BGC0001196,orf00012,A1,FALSE,L,ahp(70.0);apa(60.0);leu(55.0);ala(50.0);met(50.0),23189.0,30242.0,-,BGC0001196,"0,1,2,3,4,5*",2,@L-Trp,L,trp,"0,1,2,3,4,5,6*",0,L-Trp,, +BGC0001196,orf00012,A2,FALSE,D,leu(80.0);ile(70.0);val(70.0);hty(70.0);ala(65.0),23189.0,30242.0,-,BGC0001196,"0,1,2,3,4,5*",3,@L-Leu,L,leu,"0,1,2,3,4,5,6*",1,L-Leu,, +BGC0001207,orf00006,A1,TRUE,D,ala(70.0);phe(70.0);val(70.0);gly(65.0);leu(65.0),8135.0,27245.0,+,BGC0001207,"#,0,1,2,3,4,5,6,7,8,9",0,@D-NMe-Phe,D,phe+MT,"10,9,8,7*,6,5,4,3,2,1,0",0,Nm-Phe,N-methylated, +BGC0001207,orf00006,A2,FALSE,L,ile(100.0);leu(80.0);val(80.0);abu(70.0);ala(60.0),8135.0,27245.0,+,BGC0001207,"#,0,1,2,3,4,5,6,7,8,9",1,@L-Ile/aIle,L,ile,"10,9,8,7*,6,5,4,3,2,1,0",1,Ile,, +BGC0001207,orf00006,A3,FALSE,L,ser(100.0);thr(65.0);hpg(65.0);allothr(60.0);dpg(60.0),8135.0,27245.0,+,BGC0001207,"#,0,1,2,3,4,5,6,7,8,9",2,@L-Ser,L,ser,"10,9,8,7*,6,5,4,3,2,1,0",2,Ser,, +BGC0001207,orf00006,A4,FALSE,D,arg(75.0);gln(70.0);glu(70.0);lys(70.0);orn(65.0),8135.0,27245.0,+,BGC0001207,"#,0,1,2,3,4,5,6,7,8,9",3,@D-Gln,D,gln,"10,9,8,7*,6,5,4,3,2,1,0",3,Gln,, +BGC0001207,orf00006,A5,FALSE,D,ile(100.0);leu(80.0);val(80.0);abu(70.0);ala(60.0),8135.0,27245.0,+,BGC0001207,"#,0,1,2,3,4,5,6,7,8,9",4,@D-Ile/aIle,D,ile,"10,9,8,7*,6,5,4,3,2,1,0",4,Ile,, +BGC0001207,orf00006,A6,FALSE,L,ile(100.0);leu(80.0);val(80.0);abu(70.0);ala(60.0),8135.0,27245.0,+,BGC0001207,"#,0,1,2,3,4,5,6,7,8,9",5,@L-Ile/aIle,L,ile,"10,9,8,7*,6,5,4,3,2,1,0",5,Ile,, +BGC0001207,orf00007,A1,FALSE,L,ser(100.0);thr(65.0);hpg(65.0);allothr(60.0);dpg(60.0),27249.0,46377.0,+,BGC0001207,"#,0,1,2,3,4,5,6,7,8,9",6,@L-Ser,L,ser,"10,9,8,7*,6,5,4,3,2,1,0",6,Ser,, +BGC0001207,orf00007,A2,FALSE,D,thr(100.0);allothr(95.0);dht(90.0);ser(80.0);dhpg(60.0),27249.0,46377.0,+,BGC0001207,"#,0,1,2,3,4,5,6,7,8,9",7,@D-aThr/Thr,D,thr,"10,9,8,7*,6,5,4,3,2,1,0",7,Thr,, +BGC0001207,orf00007,A3,FALSE,L,ala(100.0);gly(75.0);ala-d(65.0);ile(60.0);leu(60.0),27249.0,46377.0,+,BGC0001207,"#,0,1,2,3,4,5,6,7,8,9",8,@L-Ala,L,ala,"10,9,8,7*,6,5,4,3,2,1,0",8,Ala,, +BGC0001207,orf00007,A4,FALSE,L,uda(60.0);ala(50.0);leu(50.0);pro(50.0);val(50.0),27249.0,46377.0,+,BGC0001207,"#,0,1,2,3,4,5,6,7,8,9",9,@L-End,L,none,"10,9,8,7*,6,5,4,3,2,1,0",9,End,, +BGC0001207,orf00007,A5,FALSE,L,ile(100.0);leu(80.0);val(80.0);abu(70.0);ala(60.0),27249.0,46377.0,+,BGC0001207,"#,0,1,2,3,4,5,6,7,8,9",10,@L-Ile/aIle,L,ile,"10,9,8,7*,6,5,4,3,2,1,0",10,Ile,, +BGC0001214,-,-,,,,,,,BGC0001214.1,"#,0,1,2,3,4,5",4,@L-Piz,L,piperazic,,,,, +BGC0001214,orf00005,A1,FALSE,L,val(90.0);abu(80.0);leu(75.0);ile(70.0);cha(70.0),5164.0,16174.0,+,BGC0001214.1,"#,0,1,2,3,4,5",0,@L-NFo-Val,L,val+unk,"6,5,4,3,2,1*,0",0,L-Val/L-allo-Ile,, +BGC0001214,orf00005,A2,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),5164.0,16174.0,+,BGC0001214.1,"#,0,1,2,3,4,5",1,@L-aThr/Thr,L,thr,"6,5,4,3,2,1*,0",1,L-Thr,, +BGC0001214,orf00005,A3,FALSE,D,phe(80.0);trp(70.0);tyr(70.0);bht(70.0);vol(60.0),5164.0,16174.0,+,BGC0001214.1,"#,0,1,2,3,4,5",2,@D-X0,D,none,"6,5,4,3,2,1*,0",2,O-Me-L-Tyr,, +BGC0001214,orf00006,A1,FALSE,D,val(95.0);ile(90.0);leu(80.0);abu(75.0);ala(60.0),16155.0,20772.0,+,BGC0001214.1,"#,0,1,2,3,4,5",3,@D-Ile/aIle,D,ile,"6,5,4,3,2,1*,0",3,L-Val/L-allo-Ile,, +BGC0001214,orf00007,A1,FALSE,L,leu(80.0);ile(70.0);val(70.0);hty(70.0);ala(65.0),20774.0,31691.0,+,BGC0001214.1,"#,0,1,2,3,4,5",5,@L-Leu,L,leu,"6,5,4,3,2,1*,0",5,L-Leu,, +BGC0001214,orf00007,A2,TRUE,L,val(95.0);abu(85.0);ile(75.0);leu(70.0);ala(60.0),20774.0,31691.0,+,BGC0001214.1,"#,0,1,2,3,4,5",6,@L-NMe-Val,L,val+MT,"6,5,4,3,2,1*,0",6,L-Val,, +BGC0001214,orf00013,A1,FALSE,L,pro(90.0);pip(65.0);ala(50.0);ala-d(45.0);cys(45.0),35425.0,37030.0,+,BGC0001214.1,"#,0,1,2,3,4,5",-,-,-,-,"6,5,4,3,2,1*,0",4,L-pip,, +BGC0001220,orf00068,A1,FALSE,L,ala(75.0);dab(70.0);ala-d(65.0);pro(50.0);ser(50.0),56221.0,78301.0,+,BGC0001220,"0,1,2,3,4,5*",0,@L-X0,L,none,"0,1,2,3,4,5*",0,Orn,,"4R,5Rdihydroxy-L-Orn" +BGC0001220,orf00068,A2,FALSE,L,lys(50.0);uda(50.0);arg(45.0);gln(45.0);asn(40.0),56221.0,78301.0,+,BGC0001220,"0,1,2,3,4,5*",1,@L-aThr/Thr,L,thr,"0,1,2,3,4,5*",1,Thr,,L-Thr +BGC0001220,orf00068,A3,FALSE,L,met(50.0);dab(50.0);ahp(50.0);aeo(40.0);hse(40.0),56221.0,78301.0,+,BGC0001220,"0,1,2,3,4,5*",2,@L-4OH-Pro,L,pro+unk,"0,1,2,3,4,5*",2,Pro,,4R-hydroxyl-L-Pro +BGC0001220,orf00068,A4,FALSE,L,arg(50.0);asp(50.0);gln(50.0);lys(50.0);orn(50.0),56221.0,78301.0,+,BGC0001220,"0,1,2,3,4,5*",3,@L-X2,L,none,"0,1,2,3,4,5*",3,Tyr,,"3S,4S-dihydroxylL-homoTyr" +BGC0001220,orf00068,A5,FALSE,L,uda(60.0);arg(50.0);dab(50.0);vol(50.0);asp(45.0),56221.0,78301.0,+,BGC0001220,"0,1,2,3,4,5*",4,@L-aThr/Thr,L,thr,"0,1,2,3,4,5*",4,Thr,,L-Thr +BGC0001220,orf00068,A6,FALSE,L,gly(50.0);ala(45.0);pro(45.0);ser(45.0);val(45.0),56221.0,78301.0,+,BGC0001220,"0,1,2,3,4,5*",5,@L-X1,L,none,"0,1,2,3,4,5*",5,Pro,,"3S-hydroxyl,4Smethyl-L-Pro" +BGC0001233,orf00017,A1,FALSE,D,pro(65.0);pip(60.0);bmt(60.0);uda(60.0);cha(60.0),21743.0,38087.0,+,BGC0001233,"#,0,1,2,3,4,5,6,7,8,9,10,11",0,Hpg,NA,hpg,"0,1,2,3,4,5,6,7,8,9,10,11,12",0,Hpg,, +BGC0001233,orf00017,A2,FALSE,D,cha(70.0);pro(65.0);pip(60.0);bmt(60.0);leu(55.0),21743.0,38087.0,+,BGC0001233,"#,0,1,2,3,4,5,6,7,8,9,10,11",1,Dhpg,NA,dhpg,"0,1,2,3,4,5,6,7,8,9,10,11,12",1,Dpg,, +BGC0001233,orf00017,A3,FALSE,L,val(95.0);abu(85.0);ile(80.0);leu(75.0);ala(60.0),21743.0,38087.0,+,BGC0001233,"#,0,1,2,3,4,5,6,7,8,9,10,11",2,Val,NA,val,"0,1,2,3,4,5,6,7,8,9,10,11,12",2,Val,, +BGC0001233,orf00017,A4,FALSE,D,cha(70.0);pro(65.0);pip(60.0);bmt(60.0);leu(55.0),21743.0,38087.0,+,BGC0001233,"#,0,1,2,3,4,5,6,7,8,9,10,11",3,Dhpg,NA,dhpg,"0,1,2,3,4,5,6,7,8,9,10,11,12",3,Dpg,, +BGC0001233,orf00018,A1,FALSE,D,hpg(100.0);dpg(90.0);dhpg(80.0);dhp(80.0);ser(65.0),56647.0,61951.0,+,BGC0001233,"#,0,1,2,3,4,5,6,7,8,9,10,11",4,Hpg,NA,hpg,"0,1,2,3,4,5,6,7,8,9,10,11,12",4,Hpg,, +BGC0001233,orf00018,A2,-,-,cha(70.0);pro(65.0);pip(60.0);bmt(60.0);leu(55.0),,,,BGC0001233,"#,0,1,2,3,4,5,6,7,8,9,10,11",5,Dhpg,NA,dhpg,"0,1,2,3,4,5,6,7,8,9,10,11,12",5,Dpg,, +BGC0001233,orf00018,A3,-,-,hpg(95.0);dpg(85.0);dhpg(75.0);dhp(75.0);gly(55.0),,,,BGC0001233,"#,0,1,2,3,4,5,6,7,8,9,10,11",6,Hpg,NA,hpg,"0,1,2,3,4,5,6,7,8,9,10,11,12",6,Hpg,, +BGC0001233,orf00018,A4,-,-,cha(70.0);pro(65.0);pip(60.0);bmt(60.0);leu(55.0),,,,BGC0001233,"#,0,1,2,3,4,5,6,7,8,9,10,11",7,Dhpg,NA,dhpg,"0,1,2,3,4,5,6,7,8,9,10,11,12",7,Dpg,, +BGC0001233,orf00018,A5,-,-,val(95.0);abu(85.0);ile(80.0);leu(75.0);ala(60.0),,,,BGC0001233,"#,0,1,2,3,4,5,6,7,8,9,10,11",8,Val,NA,val,"0,1,2,3,4,5,6,7,8,9,10,11,12",8,Val,, +BGC0001233,orf00018,A6,-,-,pro(60.0);pip(60.0);bmt(60.0);cha(60.0);leu(55.0),,,,BGC0001233,"#,0,1,2,3,4,5,6,7,8,9,10,11",9,Dhpg,NA,dhpg,"0,1,2,3,4,5,6,7,8,9,10,11,12",9,Dpg,, +BGC0001233,orf00019,A1,FALSE,L,hpg(100.0);dpg(95.0);dhpg(85.0);dhp(85.0);leu(55.0),61952.0,72176.0,+,BGC0001233,"#,0,1,2,3,4,5,6,7,8,9,10,11",10,Hpg,NA,hpg,"0,1,2,3,4,5,6,7,8,9,10,11,12",10,Hpg,, +BGC0001233,orf00019,A2,FALSE,L,phe(100.0);tyr(85.0);bht(85.0);trp(80.0);bmt(60.0),61952.0,72176.0,+,BGC0001233,"#,0,1,2,3,4,5,6,7,8,9,10,11",11,Phe,NA,phe,"0,1,2,3,4,5,6,7,8,9,10,11,12",11,Phe,, +BGC0001233,orf00019,A3,FALSE,L,asp(100.0);asn(85.0);gln(60.0);glu(60.0);lys(55.0),61952.0,72176.0,+,BGC0001233,"#,0,1,2,3,4,5,6,7,8,9,10,11",12,Asp,NA,asp,"0,1,2,3,4,5,6,7,8,9,10,11,12",12,Asp,, +BGC0001290,orf00001,A1,FALSE,L,gly(80.0);ala(70.0);ala-d(65.0);tcl(40.0);uda(40.0),0.0,18009.0,-,BGC0001290.1,"#,0,1,2,3,4",1,Gly,NA,gly,"0,1,2,3,4*,$",0,Gly,, +BGC0001290,orf00001,A2,FALSE,L,gln(55.0);arg(50.0);glu(50.0);lys(50.0);orn(50.0),0.0,18009.0,-,BGC0001290.1,"#,0,1,2,3,4",2,@L-Val,L,val,"0,1,2,3,4*,$",1,Val,, +BGC0001290,orf00001,A3,FALSE,L,hty(60.0);gly(55.0);leu(55.0);ala(50.0);pro(50.0),0.0,18009.0,-,BGC0001290.1,"#,0,1,2,3,4",3,@L-Leu,L,leu,"0,1,2,3,4*,$",2,Leu,, +BGC0001290,orf00001,A4,FALSE,L,pro(85.0);cha(60.0);ala(55.0);pip(55.0);gly(50.0),0.0,18009.0,-,BGC0001290.1,"#,0,1,2,3,4",4,@L-Ala,L,ala,"0,1,2,3,4*,$",3,Ala,, +BGC0001290,orf00001,A5,FALSE,L,pro(70.0);ala(55.0);pip(55.0);gly(50.0);leu(50.0),0.0,18009.0,-,BGC0001290.1,"#,0,1,2,3,4",5,@L-Ala,L,ala,"0,1,2,3,4*,$",4,Ala,, +BGC0001312,-,-,,,,,,,BGC0001312,"#,0,1,2,3,4,5,6,7",0,@L-Leu,L,leu,,,,, +BGC0001312,-,-,,,,,,,BGC0001312,"#,0,1,2,3,4,5,6,7",1,@D-Glu,D,glu,,,,, +BGC0001312,orf00005,A1,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),5733.0,18618.0,+,BGC0001312,"#,0,1,2,3,4,5,6,7",2,@D-aThr/Thr,D,thr,,,,, +BGC0001312,orf00005,A2,FALSE,D,val(100.0);abu(80.0);ile(75.0);leu(70.0);ala(65.0),5733.0,18618.0,+,BGC0001312,"#,0,1,2,3,4,5,6,7",3,@D-Val,D,val,,,,, +BGC0001312,orf00005,A3,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),5733.0,18618.0,+,BGC0001312,"#,0,1,2,3,4,5,6,7",4,@L-Leu,L,leu,,,,, +BGC0001312,orf00005,A4,FALSE,D,ser(100.0);thr(70.0);hpg(65.0);allothr(60.0);dpg(60.0),5733.0,18618.0,+,BGC0001312,"#,0,1,2,3,4,5,6,7",5,@D-Ser,D,ser,,,,, +BGC0001312,orf00006,A1,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),18614.0,29936.0,+,BGC0001312,"#,0,1,2,3,4,5,6,7",6,@L-Leu,L,leu,,,,, +BGC0001312,orf00006,A2,FALSE,D,ser(100.0);thr(70.0);hpg(65.0);allothr(60.0);dpg(60.0),18614.0,29936.0,+,BGC0001312,"#,0,1,2,3,4,5,6,7",7,@D-Ser,D,ser,,,,, +BGC0001312,orf00006,A3,FALSE,L,ile(100.0);leu(80.0);val(80.0);abu(70.0);ala(60.0),18614.0,29936.0,+,BGC0001312,"#,0,1,2,3,4,5,6,7",8,@L-Ile/aIle,L,ile,,,,, +BGC0001341,orf00004,A1,FALSE,L,ala(50.0);pro(45.0);val(45.0);cys(40.0);gly(40.0),2639.0,10637.0,-,BGC0001341,"#,0",0,Lac,NA,lac,,,,, +BGC0001341,orf00004,A2,FALSE,L,val(95.0);ile(80.0);abu(80.0);leu(75.0);ala(65.0),2639.0,10637.0,-,BGC0001341,"#,0",1,Val,NA,val,,,,, +BGC0001341,orf00005,A1,FALSE,L,tcl(60.0);ala(45.0);gly(45.0);pro(45.0);val(45.0),10642.0,20767.0,-,BGC0001341,"#,0",0,Hiv,NA,hiv,,,,, +BGC0001341,orf00005,A2,FALSE,D,val(100.0);ile(85.0);leu(80.0);abu(80.0);ala(65.0),10642.0,20767.0,-,BGC0001341,"#,0",1,@D-Val,D,val,,,,, +BGC0001346,orf00003,A1,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),2410.0,8821.0,+,BGC0001346.1,"#,0,1,2,3,4,5,6",0,Leu,NA,leu,,,,, +BGC0001346,orf00003,A2,FALSE,D,asp(100.0);asn(75.0);gln(75.0);glu(75.0);arg(55.0),2410.0,8821.0,+,BGC0001346.1,"#,0,1,2,3,4,5,6",1,Asp,NA,asp,,,,, +BGC0001346,orf00004,A1,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),9071.0,22079.0,+,BGC0001346.1,"#,0,1,2,3,4,5,6",2,aThr/Thr,NA,thr,,,,, +BGC0001346,orf00004,A2,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),9071.0,22079.0,+,BGC0001346.1,"#,0,1,2,3,4,5,6",3,Ile/aIle,NA,ile,,,,, +BGC0001346,orf00004,A3,FALSE,D,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),9071.0,22079.0,+,BGC0001346.1,"#,0,1,2,3,4,5,6",4,Leu,NA,leu,,,,, +BGC0001346,orf00004,A4,FALSE,D,gln(80.0);glu(80.0);asp(75.0);asn(65.0);bmt(60.0),9071.0,22079.0,+,BGC0001346.1,"#,0,1,2,3,4,5,6",5,Gln,NA,gln,,,,, +BGC0001346,orf00005,A1,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),22075.0,30376.0,+,BGC0001346.1,"#,0,1,2,3,4,5,6",6,Leu,NA,leu,,,,, +BGC0001346,orf00005,A2,FALSE,L,ile(100.0);leu(80.0);val(80.0);abu(70.0);ala(60.0),22075.0,30376.0,+,BGC0001346.1,"#,0,1,2,3,4,5,6",7,Leu,NA,leu,,,,, +BGC0001370,orf00016,A1,FALSE,L,ser(100.0);thr(65.0);hpg(65.0);allothr(60.0);dpg(60.0),16354.0,38248.0,+,BGC0001370.1,"#,0,1,2,3,4,5,6,7,8,9",0,@L-Ser,L,ser,,,,, +BGC0001370,orf00016,A2,FALSE,L,thr(100.0);allothr(95.0);dht(90.0);ser(80.0);dhpg(60.0),16354.0,38248.0,+,BGC0001370.1,"#,0,1,2,3,4,5,6,7,8,9",1,@L-aThr/Thr,L,thr,,,,, +BGC0001370,orf00016,A3,FALSE,D,trp(100.0);phe(80.0);tyr(75.0);bht(70.0);bmt(60.0),16354.0,38248.0,+,BGC0001370.1,"#,0,1,2,3,4,5,6,7,8,9",2,@L-Trp,L,trp,,,,, +BGC0001370,orf00016,A4,FALSE,L,asp(100.0);asn(85.0);glu(65.0);gln(60.0);hty(60.0),16354.0,38248.0,+,BGC0001370.1,"#,0,1,2,3,4,5,6,7,8,9",3,@L-Asp,L,asp,,,,, +BGC0001370,orf00016,A5,FALSE,L,asp(100.0);asn(85.0);glu(65.0);gln(60.0);hty(60.0),16354.0,38248.0,+,BGC0001370.1,"#,0,1,2,3,4,5,6,7,8,9",4,@L-Asp,L,asp,,,,, +BGC0001370,orf00016,A6,FALSE,D,hpg(100.0);dpg(90.0);dhpg(80.0);dhp(80.0);ser(70.0),16354.0,38248.0,+,BGC0001370.1,"#,0,1,2,3,4,5,6,7,8,9",5,@D-Hpg,D,hpg,,,,, +BGC0001370,orf00017,A1,FALSE,L,asp(100.0);asn(85.0);glu(65.0);gln(60.0);hty(60.0),38244.0,49158.0,+,BGC0001370.1,"#,0,1,2,3,4,5,6,7,8,9",6,@L-Asp,L,asp,,,,, +BGC0001370,orf00017,A2,FALSE,L,gly(100.0);ala(75.0);leu(65.0);val(65.0);ile(60.0),38244.0,49158.0,+,BGC0001370.1,"#,0,1,2,3,4,5,6,7,8,9",7,Gly,NA,gly,,,,, +BGC0001370,orf00017,A3,FALSE,D,asn(100.0);asp(90.0);glu(65.0);gln(60.0);cha(60.0),38244.0,49158.0,+,BGC0001370.1,"#,0,1,2,3,4,5,6,7,8,9",8,@D-X0,D,none,,,,, +BGC0001370,orf00018,A1,FALSE,L,glu(75.0);asp(70.0);asn(65.0);gly(60.0);ile(60.0),49142.0,56366.0,+,BGC0001370.1,"#,0,1,2,3,4,5,6,7,8,9",9,@L-Glu,L,glu,,,,, +BGC0001370,orf00018,A2,FALSE,L,trp(100.0);phe(80.0);tyr(70.0);bht(70.0);phg(50.0),49142.0,56366.0,+,BGC0001370.1,"#,0,1,2,3,4,5,6,7,8,9",10,@L-Trp,L,trp,,,,, +BGC0001393,orf00008,A1,FALSE,L,val(90.0);ile(85.0);leu(80.0);abu(75.0);ala(70.0),5583.0,12708.0,+,BGC0001393,"0,1,2,3,4,5,6*",3,@D-Val,D,val,"0,1,2,3,4,5*",1,Val,, +BGC0001393,orf00008,A2,FALSE,L,val(60.0);thr(55.0);ala(50.0);gly(50.0);leu(50.0),5583.0,12708.0,+,BGC0001393,"0,1,2,3,4,5,6*",4,@L-Trp,L,trp,"0,1,2,3,4,5*",2,Trp,, +BGC0001393,orf00010,A1,FALSE,L,leu(85.0);val(75.0);ile(70.0);ala(65.0);gly(65.0),13224.0,16917.0,+,BGC0001393,"0,1,2,3,4,5,6*",5,@D-Leu,D,leu,"0,1,2,3,4,5*",3,Leu,, +BGC0001393,orf00011,A1,FALSE,L,val(95.0);ile(80.0);abu(80.0);leu(75.0);ala(65.0),16934.0,25748.0,+,BGC0001393,"0,1,2,3,4,5,6*",6,@L-Val,L,val,"0,1,2,3,4,5*",4,Val,, +BGC0001393,orf00011,A1,FALSE,L,val(95.0);ile(80.0);abu(80.0);leu(75.0);ala(65.0),16934.0,25748.0,+,BGC0001393,"0,1,2,3,4,5,6*",0,@D-Val,D,val,"0,1,2,3,4,5*",5,Val,, +BGC0001393,orf00011,A1,FALSE,L,val(95.0);ile(80.0);abu(80.0);leu(75.0);ala(65.0),16934.0,25748.0,+,BGC0001393,"0,1,2,3,4,5,6*",1,@L-Val,L,val,"0,1,2,3,4,5*",6,,, +BGC0001393,orf00013,A1,FALSE,L,cys(100.0);ser(60.0);ala(55.0);gly(55.0);thr(55.0),26468.0,28208.0,+,BGC0001393,"0,1,2,3,4,5,6*",2,@L-Cys,L,cys,"0,1,2,3,4,5*",0,Thiazolidine(Cys),, +BGC0001402,orf00002,A1,FALSE,L,pro(70.0);dab(60.0);ala(50.0);ser(50.0);cys(45.0),2579.0,19652.0,-,BGC0001402.1,"0,1,2,3,4*",4,@L-Ser,L,ser,"0,1,2,3,4*",0,Ser,, +BGC0001402,orf00002,A2,FALSE,L,gly(50.0);leu(50.0);ser(50.0);dab(50.0);ala(45.0),2579.0,19652.0,-,BGC0001402.1,"0,1,2,3,4*",3,@L-Abu,L,abu,"0,1,2,3,4*",1,Pro+??,, +BGC0001402,orf00002,A3,FALSE,L,leu(85.0);ile(75.0);val(75.0);ala(70.0);ala-d(70.0),2579.0,19652.0,-,BGC0001402.1,"0,1,2,3,4*",2,@L-Cl2-Pro,L,pro+unk,"0,1,2,3,4*",2,2-amino-butyric acid(был Thr),, +BGC0001402,orf00002,A4,FALSE,L,dab(60.0);orn(50.0);arg(45.0);asn(40.0);asp(40.0),2579.0,19652.0,-,BGC0001402.1,"0,1,2,3,4*",1,@L-Ser,L,ser,"0,1,2,3,4*",3,Ser,, +BGC0001402,orf00002,A5,FALSE,L,ala(50.0);gly(50.0);ile(50.0);pro(50.0);val(50.0),2579.0,19652.0,-,BGC0001402.1,"0,1,2,3,4*",0,@L-bPhe,L,none,"0,1,2,3,4*",4,beta-phenylalanin,, +BGC0001406,orf00020,A1,FALSE,L,asp(95.0);asn(85.0);gln(60.0);glu(60.0);hty(60.0),24833.0,39140.0,+,BGC0001406,"#,0,1,2,3,4,5,6,7,8,9",0,@L-Asp,L,asp,"10,9,8,7,6,5,4,3,2*,1,0",0,Asp,, +BGC0001406,orf00020,A2,FALSE,L,ser(95.0);hpg(65.0);thr(60.0);dpg(60.0);dhpg(55.0),24833.0,39140.0,+,BGC0001406,"#,0,1,2,3,4,5,6,7,8,9",1,@L-Ser,L,ser,"10,9,8,7,6,5,4,3,2*,1,0",1,Ser,, +BGC0001406,orf00020,A3,FALSE,L,thr(100.0);allothr(95.0);dht(90.0);ser(80.0);dhpg(60.0),24833.0,39140.0,+,BGC0001406,"#,0,1,2,3,4,5,6,7,8,9",2,@L-aThr/Thr,L,thr,"10,9,8,7,6,5,4,3,2*,1,0",2,Thr,, +BGC0001406,orf00020,A4,FALSE,D,thr(100.0);allothr(95.0);dht(90.0);ser(80.0);dhpg(60.0),24833.0,39140.0,+,BGC0001406,"#,0,1,2,3,4,5,6,7,8,9",3,@L-aThr/Thr,L,thr,"10,9,8,7,6,5,4,3,2*,1,0",3,allo-Thr,, +BGC0001406,orf00021,A1,FALSE,L,ser(95.0);thr(65.0);allothr(65.0);hpg(65.0);dab(60.0),39136.0,56611.0,+,BGC0001406,"#,0,1,2,3,4,5,6,7,8,9",4,@L-Ala,L,ala,"10,9,8,7,6,5,4,3,2*,1,0",4,Ala,, +BGC0001406,orf00021,A2,FALSE,L,gly(95.0);ala(75.0);ile(70.0);leu(65.0);val(65.0),39136.0,56611.0,+,BGC0001406,"#,0,1,2,3,4,5,6,7,8,9",5,Gly,NA,gly,"10,9,8,7,6,5,4,3,2*,1,0",5,Gly,, +BGC0001406,orf00021,A3,FALSE,L,pro(95.0);pip(75.0);ser(55.0);ala(50.0);cys(50.0),39136.0,56611.0,+,BGC0001406,"#,0,1,2,3,4,5,6,7,8,9",6,@L-3OH-Pro,L,pro+unk,"10,9,8,7,6,5,4,3,2*,1,0",6,trans-3-Hyp,, +BGC0001406,orf00021,A4,FALSE,L,hty(60.0);vol(50.0);bmt(50.0);apa(50.0);cit(50.0),39136.0,56611.0,+,BGC0001406,"#,0,1,2,3,4,5,6,7,8,9",7,dh-Trp,NA,trp+unk,"10,9,8,7,6,5,4,3,2*,1,0",7,Z-delta-Trp,, +BGC0001406,orf00021,A5,FALSE,D,tyr(70.0);bht(70.0);ile(55.0);leu(55.0);val(55.0),39136.0,56611.0,+,BGC0001406,"#,0,1,2,3,4,5,6,7,8,9",8,@L-X0,L,none,"10,9,8,7,6,5,4,3,2*,1,0",8,beta-Metyl-Trp,, +BGC0001406,orf00022,A1,FALSE,L,end(50.0);bmt(40.0);hse(40.0);met(30.0);b-ala(30.0),56610.0,63810.0,+,BGC0001406,"#,0,1,2,3,4,5,6,7,8,9",9,@L-3OH-Leu,L,leu+unk,"10,9,8,7,6,5,4,3,2*,1,0",9,erythro-Hyl,, +BGC0001406,orf00022,A2,FALSE,L,pro(80.0);pip(65.0);ala(55.0);ser(55.0);ala-d(45.0),56610.0,63810.0,+,BGC0001406,"#,0,1,2,3,4,5,6,7,8,9",10,@L-3OH-Pro,L,pro+unk,"10,9,8,7,6,5,4,3,2*,1,0",10,cis-3-Hyp,, +BGC0001414,orf00009,A1,TRUE,L,thr(85.0);allothr(80.0);ser(65.0);hpg(60.0);dht(60.0),8500.0,31885.0,-,BGC0001414,"#,0,1,2,3,4,5,6,7,8",-,-,-,-,"9,8,7,6,5,4,3,2*,1,0",0,Ac-Val,, +BGC0001414,orf00009,A2,FALSE,L,pro(90.0);pip(75.0);leu(50.0);ahp(50.0);ala(45.0),8500.0,31885.0,-,BGC0001414,"#,0,1,2,3,4,5,6,7,8",1,@L-4Me-Pro,L,pro+MT,"9,8,7,6,5,4,3,2*,1,0",1,Pro,, +BGC0001414,orf00009,A3,TRUE,L,thr(100.0);allothr(95.0);dht(90.0);ser(80.0);dhpg(60.0),8500.0,31885.0,-,BGC0001414,"#,0,1,2,3,4,5,6,7,8",2,@L-NMe-Thr,L,thr+MT,"9,8,7,6,5,4,3,2*,1,0",2,NMe-Thr,, +BGC0001414,orf00009,A4,FALSE,L,phe(75.0);tyr(75.0);bht(70.0);trp(65.0);leu(60.0),8500.0,31885.0,-,BGC0001414,"#,0,1,2,3,4,5,6,7,8",3,@L-Leu,L,leu,"9,8,7,6,5,4,3,2*,1,0",3,Leu,, +BGC0001414,orf00009,A5,FALSE,L,pro(90.0);pip(75.0);leu(50.0);ahp(50.0);ala(45.0),8500.0,31885.0,-,BGC0001414,"#,0,1,2,3,4,5,6,7,8",4,@L-4Me-Pro,L,pro+MT,"9,8,7,6,5,4,3,2*,1,0",4,MePro,, +BGC0001414,orf00009,A6,FALSE,L,phe(75.0);tyr(75.0);bht(70.0);trp(65.0);leu(60.0),8500.0,31885.0,-,BGC0001414,"#,0,1,2,3,4,5,6,7,8",5,@L-Leu,L,leu,"9,8,7,6,5,4,3,2*,1,0",5,Leu,, +BGC0001414,orf00018,A1,TRUE,L,thr(85.0);allothr(80.0);ser(65.0);hpg(60.0);dht(60.0),39708.0,52146.0,+,BGC0001414,"#,0,1,2,3,4,5,6,7,8",6,@L-NMe-Val,L,val+MT,"9,8,7,6,5,4,3,2*,1,0",6,NMe-Val,, +BGC0001414,orf00018,A2,FALSE,L,pro(100.0);pip(70.0);ala(55.0);ala-d(50.0);gly(50.0),39708.0,52146.0,+,BGC0001414,"#,0,1,2,3,4,5,6,7,8",7,@L-Pro,L,pro,"9,8,7,6,5,4,3,2*,1,0",7,Pro,, +BGC0001414,orf00018,A3,TRUE,D,phe(75.0);tyr(75.0);bht(70.0);trp(65.0);leu(60.0),39708.0,52146.0,+,BGC0001414,"#,0,1,2,3,4,5,6,7,8",8,@L-NMe-Leu,L,leu+MT,"9,8,7,6,5,4,3,2*,1,0",8,Leu,, +BGC0001414,orf00019,A1,FALSE,L,gly(95.0);ala(70.0);ile(65.0);val(65.0);leu(60.0),52142.0,56117.0,+,BGC0001414,"#,0,1,2,3,4,5,6,7,8",9,Gly,NA,gly,"9,8,7,6,5,4,3,2*,1,0",9,Gly,, +BGC0001448,orf00004,A1,FALSE,L,arg(50.0);gln(50.0);asp(45.0);glu(45.0);orn(45.0),2470.0,5602.0,+,BGC0001448,"#,0,1,2,3,4,5,6,7,8",0,@D-bMe-Asp,D,asp+MT,,,,, +BGC0001448,orf00014,A1,FALSE,L,asp(90.0);asn(85.0);gln(65.0);glu(65.0);orn(50.0),15318.0,24513.0,+,BGC0001448,"#,0,1,2,3,4,5,6,7,8",1,@L-L-Dbu/Dbu,L,none,,,,, +BGC0001448,orf00014,A2,FALSE,D,val(100.0);abu(90.0);ile(75.0);leu(70.0);ala(60.0),15318.0,24513.0,+,BGC0001448,"#,0,1,2,3,4,5,6,7,8",2,@D-Val,D,val,,,,, +BGC0001448,orf00015,A1,FALSE,L,glu(65.0);arg(55.0);asp(55.0);gln(55.0);lys(50.0),24509.0,41396.0,+,BGC0001448,"#,0,1,2,3,4,5,6,7,8",3,@L-Lys,L,lys,,,,, +BGC0001448,orf00015,A2,FALSE,L,asp(90.0);asn(85.0);gln(60.0);glu(60.0);hty(60.0),24509.0,41396.0,+,BGC0001448,"#,0,1,2,3,4,5,6,7,8",4,@L-OH-Asp,L,asp+unk,,,,, +BGC0001448,orf00015,A3,FALSE,L,asp(90.0);asn(85.0);gln(60.0);glu(60.0);hty(60.0),24509.0,41396.0,+,BGC0001448,"#,0,1,2,3,4,5,6,7,8",5,@L-Asp,L,asp,,,,, +BGC0001448,orf00015,A4,FALSE,L,gly(100.0);ala(80.0);leu(70.0);ile(65.0);val(65.0),24509.0,41396.0,+,BGC0001448,"#,0,1,2,3,4,5,6,7,8",6,Gly,NA,gly,,,,, +BGC0001448,orf00015,A5,FALSE,D,asp(90.0);asn(85.0);gln(65.0);glu(65.0);orn(50.0),24509.0,41396.0,+,BGC0001448,"#,0,1,2,3,4,5,6,7,8",7,@L-bMe-Asp,L,asp+MT,,,,, +BGC0001448,orf00016,A1,FALSE,L,val(100.0);ile(85.0);leu(80.0);abu(80.0);ala(65.0),41400.0,48243.0,+,BGC0001448,"#,0,1,2,3,4,5,6,7,8",8,@L-Val,L,val,,,,, +BGC0001448,orf00016,A2,FALSE,L,pro(80.0);pip(75.0);leu(50.0);met(50.0);ala(45.0),41400.0,48243.0,+,BGC0001448,"#,0,1,2,3,4,5,6,7,8",9,@L-4Me-Pro,L,pro+MT,,,,, +BGC0001469,-,-,,,,,,,BGC0001469,"0,1,2,3,4,5,6*",4,@D-Phe,D,phe,,,,, +BGC0001469,orf00004,A1,FALSE,D,ser(90.0);hse(60.0);ala(55.0);cys(55.0);leu(55.0),4131.0,21873.0,-,BGC0001469,"0,1,2,3,4,5,6*",6,@D-Ala,D,ala,"0,1,2,3,4,5,6*",0,D-Phe?,,Phe +BGC0001469,orf00004,A2,FALSE,L,cys(100.0);ser(60.0);ala(55.0);gly(55.0);thr(50.0),4131.0,21873.0,-,BGC0001469,"0,1,2,3,4,5,6*",0,dhCys,NA,cys+unk,"0,1,2,3,4,5,6*",1,L-heterocycl-Cys,Heterocyclization (cyclodehydration),Cys +BGC0001469,orf00004,A3,FALSE,L,cys(100.0);ser(60.0);ala(55.0);gly(55.0);thr(50.0),4131.0,21873.0,-,BGC0001469,"0,1,2,3,4,5,6*",1,dhCys,NA,cys+unk,"0,1,2,3,4,5,6*",2,L-heterocycl-Cys,Heterocyclization (cyclodehydration),Cys +BGC0001469,orf00004,A4,FALSE,L,cys(100.0);ser(60.0);ala(55.0);gly(55.0);thr(50.0),4131.0,21873.0,-,BGC0001469,"0,1,2,3,4,5,6*",2,dhCys,NA,cys+unk,"0,1,2,3,4,5,6*",3,L-heterocycl-Cys,Heterocyclization (cyclodehydration),Cys +BGC0001469,orf00004,A5,FALSE,L,cys(100.0);ser(60.0);ala(55.0);gly(55.0);thr(50.0),4131.0,21873.0,-,BGC0001469,"0,1,2,3,4,5,6*",3,@L-Cys,L,cys,"0,1,2,3,4,5,6*",4,L-heterocycl-Cys,Heterocyclization (cyclodehydration),Cys +BGC0001469,orf00005,A1,FALSE,L,ile(100.0);leu(80.0);val(75.0);ala(70.0);abu(70.0),21889.0,25180.0,-,BGC0001469,"0,1,2,3,4,5,6*",5,@L-Ile/aIle,L,ile,"0,1,2,3,4,5,6*",6,L-Ile,,Ile +BGC0001532,orf00001,A1,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),45.0,1980.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",1,dhAbu,NA,abu+unk,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",0,Bmt,, +BGC0001532,orf00002,A1,FALSE,L,phe(75.0);met(70.0);trp(70.0);tyr(70.0);bht(65.0),1982.0,9575.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",2,Leu,NA,leu,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",1,Val,, +BGC0001532,orf00002,A2,FALSE,D,leu(75.0);ile(70.0);hty(70.0);ala(65.0);ala-d(65.0),1982.0,9575.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",3,@D-Orn,D,orn,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",2,Lys,, +BGC0001532,orf00003,A1,FALSE,L,ile(100.0);leu(80.0);val(75.0);ala(70.0);abu(70.0),9621.0,23457.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",4,@L-Ile/aIle,L,ile,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",3,Ile,, +BGC0001532,orf00003,A2,FALSE,L,ile(100.0);leu(80.0);val(75.0);ala(70.0);abu(70.0),9621.0,23457.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",5,@L-Val,L,val,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",4,Val,, +BGC0001532,orf00003,A3,FALSE,L,ile(85.0);leu(80.0);val(75.0);ala(70.0);abu(70.0),9621.0,23457.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",6,@L-Val,L,val,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",5,Val,, +BGC0001532,orf00003,A4,FALSE,D,leu(75.0);hty(70.0);ala(65.0);ala-d(65.0);ile(65.0),9621.0,23457.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",7,Lys,NA,lys,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",6,Lys,, +BGC0001532,orf00004,A1,FALSE,L,ile(85.0);leu(80.0);val(75.0);ala(70.0);abu(70.0),23474.0,37274.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",8,@L-Val,L,val,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",7,Val,, +BGC0001532,orf00004,A2,FALSE,L,ile(85.0);leu(80.0);val(75.0);ala(70.0);abu(70.0),23474.0,37274.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",9,Leu,NA,leu,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",8,Leu,, +BGC0001532,orf00004,A3,,,leu(75.0);hty(70.0);ala(65.0);ala-d(65.0);ile(65.0),23474.0,37274.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",10,Lys,NA,lys,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",9,Lys,, +BGC0001532,orf00004,A4,,,bht(75.0);phe(70.0);trp(70.0);tyr(70.0);vol(60.0),23474.0,37274.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",11,@D-Tyr,D,tyr,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",10,Phe,, +BGC0001532,orf00005,A1,FALSE,L,leu(100.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),37358.0,44777.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",12,Leu,NA,leu,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",11,Leu,, +BGC0001532,orf00005,A2,FALSE,L,ile(85.0);leu(80.0);val(75.0);ala(70.0);abu(70.0),37358.0,44777.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",-,-,-,-,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",12,Val,, +BGC0001532,orf00009,A1,FALSE,L,tcl(50.0);met(30.0);sal(30.0);hyv(30.0);hty(30.0),48298.0,53542.0,+,BGC0001532,"#,0,1,2,3,4,5,6,7,8,9,10,11,12",13,Valol,NA,vol,"0,1,2,3,4,5,6,7,8,9,10,11,12,13",13,akV,, +BGC0001565,orf00008,A1,FALSE,L,ala-d(90.0);pro(75.0);ala(70.0);leu(70.0);val(65.0),9188.0,55022.0,-,BGC0001565,"0,1,2,3,4,5,6,7,8,9,10*",2,@D-Ala,D,ala,"0,1,2,3,4,5,6,7,8,9,10*",0,Ala,, +BGC0001565,orf00008,A10,TRUE,L,leu(95.0);val(75.0);ala-d(70.0);ile(70.0);bmt(70.0),9188.0,55022.0,-,BGC0001565,"0,1,2,3,4,5,6,7,8,9,10*",0,@L-NMe-Leu,L,leu+MT,"0,1,2,3,4,5,6,7,8,9,10*",9,MeLeu,, +BGC0001565,orf00008,A11,FALSE,L,ala(90.0);pro(75.0);val(70.0);gly(65.0);leu(65.0),9188.0,55022.0,-,BGC0001565,"0,1,2,3,4,5,6,7,8,9,10*",1,@L-Ala,L,ala,"0,1,2,3,4,5,6,7,8,9,10*",10,Ala,, +BGC0001565,orf00008,A2,TRUE,L,leu(95.0);val(75.0);ala-d(70.0);ile(70.0);bmt(70.0),9188.0,55022.0,-,BGC0001565,"0,1,2,3,4,5,6,7,8,9,10*",3,@L-NMe-Leu,L,leu+MT,"0,1,2,3,4,5,6,7,8,9,10*",1,MeLeu,, +BGC0001565,orf00008,A3,TRUE,L,leu(95.0);val(75.0);ala-d(70.0);ile(70.0);bmt(70.0),9188.0,55022.0,-,BGC0001565,"0,1,2,3,4,5,6,7,8,9,10*",4,@L-NMe-Leu,L,leu+MT,"0,1,2,3,4,5,6,7,8,9,10*",2,MeLeu,, +BGC0001565,orf00008,A4,TRUE,L,val(95.0);leu(75.0);ala(70.0);abu(70.0);gly(65.0),9188.0,55022.0,-,BGC0001565,"0,1,2,3,4,5,6,7,8,9,10*",5,@L-NMe-Val,L,val+MT,"0,1,2,3,4,5,6,7,8,9,10*",3,MeVal,, +BGC0001565,orf00008,A5,TRUE,L,bmt(100.0);phe(75.0);trp(70.0);tyr(70.0);bht(70.0),9188.0,55022.0,-,BGC0001565,"0,1,2,3,4,5,6,7,8,9,10*",6,@L-NMe-Bmt,L,bmt+MT,"0,1,2,3,4,5,6,7,8,9,10*",4,MeBmt,, +BGC0001565,orf00008,A6,FALSE,L,abu(85.0);ile(70.0);leu(70.0);val(65.0);ala(60.0),9188.0,55022.0,-,BGC0001565,"0,1,2,3,4,5,6,7,8,9,10*",7,@L-aThr/Thr,L,thr,"0,1,2,3,4,5,6,7,8,9,10*",5,Abu/Thr,, +BGC0001565,orf00008,A7,TRUE,L,gly(75.0);pro(65.0);val(50.0);ser(45.0);pip(45.0),9188.0,55022.0,-,BGC0001565,"0,1,2,3,4,5,6,7,8,9,10*",8,NMe-Gly,NA,gly+MT,"0,1,2,3,4,5,6,7,8,9,10*",6,MeGly,, +BGC0001565,orf00008,A8,TRUE,L,leu(95.0);val(75.0);ala-d(70.0);ile(70.0);bmt(70.0),9188.0,55022.0,-,BGC0001565,"0,1,2,3,4,5,6,7,8,9,10*",9,@L-NMe-Leu,L,leu+MT,"0,1,2,3,4,5,6,7,8,9,10*",7,MeLeu,, +BGC0001565,orf00008,A9,FALSE,L,val(95.0);leu(75.0);pro(75.0);ala(70.0);abu(70.0),9188.0,55022.0,-,BGC0001565,"0,1,2,3,4,5,6,7,8,9,10*",10,@L-Val,L,val,"0,1,2,3,4,5,6,7,8,9,10*",8,Val,, +BGC0001569,orf00012,A1,FALSE,L,ser(70.0);thr(70.0);allothr(65.0);dhpg(55.0);dhp(55.0),9609.0,17448.0,-,BGC0001569,"0,1,2,3,4,5*",2,@L-Orn,L,orn,,,,, +BGC0001569,orf00012,A2,FALSE,D,asn(100.0);asp(90.0);glu(65.0);gln(60.0);cha(60.0),9609.0,17448.0,-,BGC0001569,"0,1,2,3,4,5*",3,OH-Asn,NA,asn+unk,,,,, +BGC0001569,orf00013,A1,FALSE,L,ahp(70.0);hty(60.0);apa(60.0);ala(50.0);gly(50.0),17570.0,31769.0,-,BGC0001569,"0,1,2,3,4,5*",4,@L-Trp,L,trp,,,,, +BGC0001569,orf00013,A2,FALSE,D,val(100.0);ile(85.0);leu(80.0);abu(80.0);ala(65.0),17570.0,31769.0,-,BGC0001569,"0,1,2,3,4,5*",5,@D-Val,D,val,,,,, +BGC0001569,orf00013,A3,FALSE,D,leu(65.0);val(55.0);gly(50.0);ile(50.0);met(50.0),17570.0,31769.0,-,BGC0001569,"0,1,2,3,4,5*",0,@D-Leu,D,leu,,,,, +BGC0001569,orf00013,A4,FALSE,L,val(85.0);leu(80.0);abu(75.0);ile(70.0);cha(70.0),17570.0,31769.0,-,BGC0001569,"0,1,2,3,4,5*",1,@L-Ile/aIle,L,ile,,,,, +BGC0001582,orf00013,A1,TRUE,L,val(100.0);abu(90.0);ile(75.0);leu(70.0);ala(60.0),18666.0,64407.0,-,BGC0001582,"#,0,1,2,3,4,5,6,7,8,9,10,11",-,-,-,-,"12,11,10,9,8,7,6,5,4,3*,2,1,0",0,N-Me-Val,, +BGC0001582,orf00013,A10,TRUE,L,hty(60.0);met(50.0);dht(50.0);hse(50.0);apa(50.0),18666.0,64407.0,-,BGC0001582,"#,0,1,2,3,4,5,6,7,8,9,10,11",9,@L-X0,L,none,"12,11,10,9,8,7,6,5,4,3*,2,1,0",9,N-Me-4-OMe-L-Trp,, +BGC0001582,orf00013,A11,FALSE,L,val(100.0);abu(90.0);ile(75.0);leu(70.0);ala(60.0),18666.0,64407.0,-,BGC0001582,"#,0,1,2,3,4,5,6,7,8,9,10,11",10,@L-Val,L,val,"12,11,10,9,8,7,6,5,4,3*,2,1,0",10,Val,, +BGC0001582,orf00013,A12,FALSE,L,phe(100.0);tyr(85.0);bht(85.0);trp(80.0);bmt(60.0),18666.0,64407.0,-,BGC0001582,"#,0,1,2,3,4,5,6,7,8,9,10,11",11,@L-Ph-Ser,L,none,"12,11,10,9,8,7,6,5,4,3*,2,1,0",11,Ph-L-Ser,, +BGC0001582,orf00013,A13,FALSE,L,val(100.0);abu(90.0);ile(75.0);leu(70.0);ala(60.0),18666.0,64407.0,-,BGC0001582,"#,0,1,2,3,4,5,6,7,8,9,10,11",12,@L-Val,L,val,"12,11,10,9,8,7,6,5,4,3*,2,1,0",12,Val,, +BGC0001582,orf00013,A2,FALSE,L,val(100.0);abu(90.0);ile(75.0);leu(70.0);ala(60.0),18666.0,64407.0,-,BGC0001582,"#,0,1,2,3,4,5,6,7,8,9,10,11",1,@L-Val,L,val,"12,11,10,9,8,7,6,5,4,3*,2,1,0",1,Val,, +BGC0001582,orf00013,A3,TRUE,L,val(90.0);leu(80.0);abu(80.0);ile(75.0);ala(65.0),18666.0,64407.0,-,BGC0001582,"#,0,1,2,3,4,5,6,7,8,9,10,11",2,@L-NMe-aIle/NMe-Ile,L,ile+MT,"12,11,10,9,8,7,6,5,4,3*,2,1,0",2,allo-Ile,, +BGC0001582,orf00013,A4,FALSE,L,thr(100.0);allothr(90.0);ser(80.0);dht(80.0);hpg(60.0),18666.0,64407.0,-,BGC0001582,"#,0,1,2,3,4,5,6,7,8,9,10,11",3,@L-aThr/Thr,L,thr,"12,11,10,9,8,7,6,5,4,3*,2,1,0",3,NMe-Thr,, +BGC0001582,orf00013,A5,TRUE,L,thr(100.0);allothr(90.0);ser(80.0);dht(80.0);hpg(60.0),18666.0,64407.0,-,BGC0001582,"#,0,1,2,3,4,5,6,7,8,9,10,11",4,@L-NMe-Thr,L,thr+MT,"12,11,10,9,8,7,6,5,4,3*,2,1,0",4,Thr,, +BGC0001582,orf00013,A6,FALSE,L,val(100.0);abu(90.0);ile(75.0);leu(70.0);ala(60.0),18666.0,64407.0,-,BGC0001582,"#,0,1,2,3,4,5,6,7,8,9,10,11",5,@L-Val,L,val,"12,11,10,9,8,7,6,5,4,3*,2,1,0",5,Val,, +BGC0001582,orf00013,A7,TRUE,L,leu(75.0);gly(70.0);val(70.0);ala(65.0);ala-d(65.0),18666.0,64407.0,-,BGC0001582,"#,0,1,2,3,4,5,6,7,8,9,10,11",6,@L-NMe-Leu,L,leu+MT,"12,11,10,9,8,7,6,5,4,3*,2,1,0",6,Leu,, +BGC0001582,orf00013,A8,FALSE,L,val(100.0);abu(90.0);ile(75.0);leu(70.0);ala(60.0),18666.0,64407.0,-,BGC0001582,"#,0,1,2,3,4,5,6,7,8,9,10,11",7,@L-Val,L,val,"12,11,10,9,8,7,6,5,4,3*,2,1,0",7,Val,, +BGC0001582,orf00013,A9,TRUE,L,val(100.0);abu(90.0);ile(75.0);leu(70.0);ala(60.0),18666.0,64407.0,-,BGC0001582,"#,0,1,2,3,4,5,6,7,8,9,10,11",8,@L-NMe-Val,L,val+MT,"12,11,10,9,8,7,6,5,4,3*,2,1,0",8,N-Me-Val,, +BGC0001614,orf00005,A1,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),4810.0,15850.0,-,BGC0001614,"#,0,1,2,3,4,5,6,7,8",5,dhAbu,NA,abu+unk,"9,8,7,6,5,4,3,2*,1,0",8,N-Me-Thr,, +BGC0001614,orf00005,A2,FALSE,D,glu(80.0);gln(75.0);asn(65.0);asp(60.0);arg(55.0),4810.0,15850.0,-,BGC0001614,"#,0,1,2,3,4,5,6,7,8",6,Gln,NA,gln,"9,8,7,6,5,4,3,2*,1,0",7,Gly,, +BGC0001614,orf00005,A3,FALSE,L,gly(95.0);ala(70.0);ile(65.0);val(65.0);leu(60.0),4810.0,15850.0,-,BGC0001614,"#,0,1,2,3,4,5,6,7,8",7,Gly,NA,gly,"9,8,7,6,5,4,3,2*,1,0",6,Gln,, +BGC0001614,orf00006,A1,FALSE,D,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),15873.0,24963.0,-,BGC0001614,"#,0,1,2,3,4,5,6,7,8",3,aThr/Thr,NA,thr,"9,8,7,6,5,4,3,2*,1,0",5,Dhb,, +BGC0001614,orf00006,A2,FALSE,D,tyr(100.0);bht(90.0);phe(80.0);uda(70.0);trp(65.0),15873.0,24963.0,-,BGC0001614,"#,0,1,2,3,4,5,6,7,8",4,Tyr,NA,tyr,"9,8,7,6,5,4,3,2*,1,0",4,Tyr,, +BGC0001614,orf00013,A1,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),30540.0,36987.0,-,BGC0001614,"#,0,1,2,3,4,5,6,7,8",1,aThr/Thr,NA,thr,"9,8,7,6,5,4,3,2*,1,0",3,Thr,, +BGC0001614,orf00013,A2,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),30540.0,36987.0,-,BGC0001614,"#,0,1,2,3,4,5,6,7,8",2,aThr/Thr,NA,thr,"9,8,7,6,5,4,3,2*,1,0",2,Thr,, +BGC0001614,orf00015,A1,TRUE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),39401.0,47882.0,-,BGC0001614,"#,0,1,2,3,4,5,6,7,8",8,NMe-Thr,NA,thr+MT,"9,8,7,6,5,4,3,2*,1,0",1,Thr,, +BGC0001614,orf00015,A2,FALSE,L,gln(95.0);glu(75.0);asn(60.0);asp(60.0);arg(55.0),39401.0,47882.0,-,BGC0001614,"#,0,1,2,3,4,5,6,7,8",9,Gln,NA,gln,"9,8,7,6,5,4,3,2*,1,0",0,Dhh,, +BGC0001628,-,-,,,,,,,BGC0001628.1,"#,0,1,2,3,4,5,6",0,Pha,NA,phe-ac,,,,, +BGC0001628,orf00005,A1,FALSE,L,val(100.0);abu(90.0);ile(75.0);leu(70.0);ala(60.0),5222.0,12920.0,+,BGC0001628.1,"#,0,1,2,3,4,5,6",1,@L-Val,L,val,"0,1,2,3,4,5,6",0,Val,, +BGC0001628,orf00005,A2,FALSE,D,asn(90.0);asp(90.0);gln(65.0);glu(65.0);arg(50.0),5222.0,12920.0,+,BGC0001628.1,"#,0,1,2,3,4,5,6",2,@D-Asp,D,asp,"0,1,2,3,4,5,6",1,Asp,, +BGC0001628,orf00006,A1,FALSE,L,ala(95.0);gly(75.0);val(75.0);ala-d(70.0);ile(65.0),12931.0,20539.0,+,BGC0001628.1,"#,0,1,2,3,4,5,6",3,@L-Ala,L,ala,"0,1,2,3,4,5,6",2,Ala,, +BGC0001628,orf00006,A2,FALSE,D,phe(75.0);bht(75.0);trp(70.0);tyr(70.0);phg(60.0),12931.0,20539.0,+,BGC0001628.1,"#,0,1,2,3,4,5,6",4,@D-3OH-Leu,D,leu+unk,"0,1,2,3,4,5,6",3,Leu,, +BGC0001628,orf00007,A1,FALSE,L,ala(80.0);gly(70.0);ala-d(65.0);ile(65.0);leu(65.0),20599.0,32077.0,+,BGC0001628.1,"#,0,1,2,3,4,5,6",5,Gly,NA,gly,"0,1,2,3,4,5,6",4,Gly,, +BGC0001628,orf00007,A2,FALSE,D,ala(95.0);gly(75.0);val(75.0);ala-d(70.0);ile(65.0),20599.0,32077.0,+,BGC0001628.1,"#,0,1,2,3,4,5,6",6,@D-Ala,D,ala,"0,1,2,3,4,5,6",5,Ala,, +BGC0001628,orf00007,A3,FALSE,L,phe(80.0);trp(75.0);met(70.0);tyr(70.0);bht(70.0),20599.0,32077.0,+,BGC0001628.1,"#,0,1,2,3,4,5,6",7,@L-Phe,L,phe,"0,1,2,3,4,5,6",6,Phe,, +BGC0001657,orf00004,A1,FALSE,L,thr(100.0);allothr(95.0);dht(90.0);ser(80.0);dhpg(60.0),5994.0,33765.0,+,BGC0001657,"#,0,1,2,3,4,5,6,7,8,9,10,11",1,@L-aThr/Thr,L,thr,"0,1,2,3,4,5,6,7,8,9,10,11*",0,Thr,, +BGC0001657,orf00004,A2,FALSE,D,cit(70.0);arg(60.0);glu(60.0);lys(60.0);uda(60.0),5994.0,33765.0,+,BGC0001657,"#,0,1,2,3,4,5,6,7,8,9,10,11",2,@D-Arg,D,arg,"0,1,2,3,4,5,6,7,8,9,10,11*",1,Arg,, +BGC0001657,orf00004,A3,FALSE,L,ser(100.0);thr(65.0);hpg(65.0);allothr(60.0);dpg(60.0),5994.0,33765.0,+,BGC0001657,"#,0,1,2,3,4,5,6,7,8,9,10,11",3,@L-Ser,L,ser,"0,1,2,3,4,5,6,7,8,9,10,11*",2,Ser,, +BGC0001657,orf00004,A4,FALSE,L,gly(100.0);ala(75.0);leu(65.0);val(65.0);ile(60.0),5994.0,33765.0,+,BGC0001657,"#,0,1,2,3,4,5,6,7,8,9,10,11",4,Gly,NA,gly,"0,1,2,3,4,5,6,7,8,9,10,11*",3,Gly,, +BGC0001657,orf00004,A5,TRUE,D,phe(95.0);tyr(85.0);bht(80.0);trp(75.0);uda(70.0),5994.0,33765.0,+,BGC0001657,"#,0,1,2,3,4,5,6,7,8,9,10,11",5,@D-NMe-Phe,D,phe+MT,"0,1,2,3,4,5,6,7,8,9,10,11*",4,Phe,, +BGC0001657,orf00004,A6,FALSE,L,phe(75.0);bht(75.0);tyr(70.0);trp(65.0);phg(60.0),5994.0,33765.0,+,BGC0001657,"#,0,1,2,3,4,5,6,7,8,9,10,11",6,@L-Leu,L,leu,"0,1,2,3,4,5,6,7,8,9,10,11*",5,Leu,, +BGC0001657,orf00004,A7,FALSE,D,asp(80.0);asn(70.0);arg(60.0);glu(60.0);lys(60.0),5994.0,33765.0,+,BGC0001657,"#,0,1,2,3,4,5,6,7,8,9,10,11",7,@D-Arg,D,arg,"0,1,2,3,4,5,6,7,8,9,10,11*",6,Arg,, +BGC0001657,orf00005,A1,FALSE,L,glu(60.0);arg(55.0);asp(55.0);lys(55.0);orn(55.0),33745.0,53263.0,+,BGC0001657,"#,0,1,2,3,4,5,6,7,8,9,10,11",8,@L-Glu,L,glu,"0,1,2,3,4,5,6,7,8,9,10,11*",7,Glu,, +BGC0001657,orf00005,A2,FALSE,D,gln(95.0);glu(75.0);asn(60.0);asp(60.0);aad(55.0),33745.0,53263.0,+,BGC0001657,"#,0,1,2,3,4,5,6,7,8,9,10,11",9,@D-Gln,D,gln,"0,1,2,3,4,5,6,7,8,9,10,11*",8,Gln,, +BGC0001657,orf00005,A3,FALSE,D,leu(75.0);val(70.0);gly(65.0);ile(65.0);ala(60.0),33745.0,53263.0,+,BGC0001657,"#,0,1,2,3,4,5,6,7,8,9,10,11",10,@D-Trp,D,trp,"0,1,2,3,4,5,6,7,8,9,10,11*",9,Trp,, +BGC0001657,orf00005,A4,FALSE,L,val(100.0);ile(85.0);abu(80.0);leu(75.0);ala(60.0),33745.0,53263.0,+,BGC0001657,"#,0,1,2,3,4,5,6,7,8,9,10,11",11,@L-Ile/aIle,L,ile,"0,1,2,3,4,5,6,7,8,9,10,11*",10,Val/Ile,, +BGC0001657,orf00005,A5,FALSE,L,thr(100.0);allothr(95.0);dht(90.0);ser(80.0);dhpg(60.0),33745.0,53263.0,+,BGC0001657,"#,0,1,2,3,4,5,6,7,8,9,10,11",12,@L-aThr/Thr,L,thr,"0,1,2,3,4,5,6,7,8,9,10,11*",11,Thr,, +BGC0001796,orf00010,A1,FALSE,D,asp(65.0);glu(65.0);asn(60.0);gln(60.0);apa(60.0),7736.0,27020.0,-,BGC0001796,"#,0,1,2,3",0,@D-Tyr,D,tyr,"0,1,2,3,4",0,Phe,, +BGC0001796,orf00010,A2,FALSE,L,ser(100.0);thr(65.0);hpg(65.0);allothr(60.0);dpg(60.0),7736.0,27020.0,-,BGC0001796,"#,0,1,2,3",1,@L-Ser,L,ser,"0,1,2,3,4",1,Ser,, +BGC0001796,orf00010,A3,FALSE,D,leu(60.0);met(60.0);bmt(60.0);ala(55.0);ser(55.0),7736.0,27020.0,-,BGC0001796,"#,0,1,2,3",2,@D-Leu,D,leu,"0,1,2,3,4",2,Leu,, +BGC0001796,orf00010,A4,FALSE,L,gln(75.0);glu(70.0);asn(60.0);asp(60.0);aad(55.0),7736.0,27020.0,-,BGC0001796,"#,0,1,2,3",3,@L-Trp,L,trp,"0,1,2,3,4",3,Phe,, +BGC0001796,orf00010,A5,FALSE,L,arg(50.0);glu(50.0);orn(50.0);dab(50.0);hty(50.0),7736.0,27020.0,-,BGC0001796,"#,0,1,2,3",4,@L-Arg,L,arg,"0,1,2,3,4",4,Phe,, +BGC0001822,orf00002,A1,FALSE,L,ser(95.0);hpg(70.0);dpg(65.0);thr(60.0);cys(55.0),1362.0,16062.0,+,BGC0001822,"#,0,1,2,3,4,5,6,7",1,Ala,NA,ala,"0,1,2,3,4,5,6,7,8,9*",2,Ala,thr+dehydration, +BGC0001822,orf00002,A2,FALSE,D,leu(95.0);ile(75.0);val(75.0);ala-d(70.0);abu(70.0),1362.0,16062.0,+,BGC0001822,"#,0,1,2,3,4,5,6,7",2,Leu,NA,leu,"0,1,2,3,4,5,6,7,8,9*",3,Leu,none, +BGC0001822,orf00003,A1,FALSE,L,ala(50.0);gly(50.0);met(50.0);pro(50.0);ser(50.0),16073.0,22562.0,+,BGC0001822,"#,0,1,2,3,4,5,6,7",3,X0,NA,none,"0,1,2,3,4,5,6,7,8,9*",4,Pro*,ile, +BGC0001822,orf00003,A2,FALSE,L,cys(100.0);ser(60.0);ala(55.0);gly(55.0);thr(50.0),16073.0,22562.0,+,BGC0001822,"#,0,1,2,3,4,5,6,7",4,Cys,NA,cys,"0,1,2,3,4,5,6,7,8,9*",5,???,b_ala, +BGC0001822,orf00004,A1,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),22558.0,32521.0,+,BGC0001822,"#,0,1,2,3,4,5,6,7",5,dhAbu,NA,abu+unk,"0,1,2,3,4,5,6,7,8,9*",6,dhAbu,none, +BGC0001822,orf00004,A2,FALSE,L,pro(85.0);pip(85.0);ala(50.0);ala-d(50.0);cys(50.0),22558.0,32521.0,+,BGC0001822,"#,0,1,2,3,4,5,6,7",6,Hpr,NA,pip,"0,1,2,3,4,5,6,7,8,9*",7,Hpr,ile, +BGC0001822,orf00004,A3,FALSE,L,thr(100.0);allothr(100.0);dht(100.0);ser(80.0);dhpg(60.0),22558.0,32521.0,+,BGC0001822,"#,0,1,2,3,4,5,6,7",7,aThr/Thr,NA,thr,"0,1,2,3,4,5,6,7,8,9*",8,Thr,thr, +BGC0001822,orf00005,A1,TRUE,L,val(100.0);ile(85.0);leu(80.0);abu(80.0);ala(65.0),32517.0,37905.0,+,BGC0001822,"#,0,1,2,3,4,5,6,7",8,NMe-Val,NA,val+MT,"0,1,2,3,4,5,6,7,8,9*",9,NMe-Val,pip, \ No newline at end of file diff --git a/nerpa.py b/nerpa.py index 83d1f17..5e3e34a 100755 --- a/nerpa.py +++ b/nerpa.py @@ -18,6 +18,7 @@ import nerpa_utils import handle_rban import logger +import src.markov_probability_model.main # for detecting and processing antiSMASH v.5 output site.addsitedir(os.path.join(nerpa_init.python_modules_dir, 'NRPSPredictor_utils')) @@ -59,6 +60,23 @@ def parse_args(log): advanced_input_group.add_argument("--force-existing-outdir", dest="output_dir_reuse", action="store_true", default=False, help="don't crash if the output dir already exists") + alternative_model_group = parser.add_argument_group( + 'Alternative model parameters', + 'Additionally use Hidden Markov Model for calculating probabilities and compare results') + alternative_model_group.add_argument("--use_alternative_model", type=bool, default=False, + help="use additional model or not") + alternative_model_group.add_argument("--algo", nargs='+', + help="list of algorithms to use for alignment", + default=['viterbi', 'global_viterbi', 'maximum_accuracy', 'maximum_posterior_decoding']) + alternative_model_group.add_argument("--use_bw", type=bool, default=False, + help="use Baum-Welch for parameters estimation or not") + alternative_model_group.add_argument("--bw_iters", type=int, default=10, + help="number of Baum-Welch iterations") + alternative_model_group.add_argument("--log_alignments", type=bool, default=False, + help="pretty log alignments with marginal probabilities or not") + alternative_model_group.add_argument("--topk", type=list, default=[1, 3, 5, 10], + help="k value for top-k-matching in computing results") + # parser.add_argument("--insertion", help="insertion score [default=-2.8]", default=-2.8, action="store") # parser.add_argument("--deletion", help="deletion score [default=-5]", default=-5, action="store") parser.add_argument('--rban-monomers-db', dest='rban_monomers', type=str, default=None, @@ -321,6 +339,13 @@ def run(args, log): "--threads", str(args.threads)] log.info("\n======= Nerpa matching") nerpa_utils.sys_call(command, log, cwd=output_dir) + if args.use_alternative_model: + src.markov_probability_model.main.run( + data_dir=output_dir, prob_gen_filepath=os.path.join(nerpa_init.configs_dir, 'prob_gen.cfg'), + results_dir=os.path.join(output_dir, 'markov_probability_model_results'), + mibig_path=os.path.join(nerpa_init.nerpa_root_dir, 'data', 'mibig.csv'), + pool_sz=args.threads, algo=args.algo, use_bw=args.use_bw, bw_iters=args.bw_iters, + log_alignments=args.log_alignments, topk=args.topk) log.info("RESULTS:") log.info("Main report is saved to " + os.path.join(output_dir, 'report.csv'), indent=1) log.info("Detailed reports are saved to " + output_dir, indent=1) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8f0fb29 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +pandas +typing +numpy +prettytable +tqdm +matplotlib diff --git a/src/markov_probability_model/base/alphabet.py b/src/markov_probability_model/base/alphabet.py new file mode 100644 index 0000000..108ad8f --- /dev/null +++ b/src/markov_probability_model/base/alphabet.py @@ -0,0 +1,57 @@ +import abc +from typing import List, NewType, TypeVar, Optional + + +class Symbol(abc.ABC): + def __init__(self, name: str, modification: Optional[str], methylation: bool): + self.name = name + self.modification = modification + self.methylation = methylation + + def __str__(self) -> str: + res = '' + if self.modification is not None: + res += self.modification + '-' + if self.methylation: + res += 'NMe-' + res += self.name + return res + + def __eq__(self, other): + return str(self) == str(other) + + def __hash__(self): + return hash(str(self)) + + def __repr__(self): + return str(self) + + +class Gap(Symbol): + def __init__(self): + super().__init__('-', None, False) + + +class Aminoacid(Symbol): + pass + + +class ScoredAminoacid(Symbol): + def __init__(self, init_name: str, modification: Optional[str], methylation: bool): + super().__init__(init_name.split('(')[0], modification, methylation) + self._score_str: str = init_name.split('(')[1].split(')')[0] + self.score: float = float(self._score_str) + + def __str__(self): + return super().__str__() + '(' + self._score_str + ')' + + +AlignedAminoacid = TypeVar('AlignedAminoacid', Aminoacid, Gap) + +AlignedScoredAminoacid = TypeVar('AlignedScoredAminoacid', ScoredAminoacid, Gap) + +Alphabet = NewType('Alphabet', List[Symbol]) + +AminoacidAlphabet = NewType('AminoacidAlphabet', List[Aminoacid]) + +ScoredAminoacidAlphabet = NewType('ScoredAminoacidAlphabet', List[ScoredAminoacid]) diff --git a/src/markov_probability_model/base/base_sequence_id_resolver.py b/src/markov_probability_model/base/base_sequence_id_resolver.py new file mode 100644 index 0000000..cd6a88a --- /dev/null +++ b/src/markov_probability_model/base/base_sequence_id_resolver.py @@ -0,0 +1,35 @@ +import abc +import re +from typing import NewType, List + +SequenceId = NewType('SequenceId', str) +BaseSequenceId = NewType('BaseSequenceId', str) + + +class BaseSequenceIdResolver(abc.ABC): + @abc.abstractmethod + def resolve(self, sequence_id: SequenceId) -> SequenceId: + pass + + +class SimpleBaseSequenceIdResolver(BaseSequenceIdResolver): + def __init__(self): + self._regexps: List[str] = [ + r'BGC[0-9]{7}', + r'NPA[0-9]{6}', + r'[A-Z]{3}[0-9]{5}_variant', + r'[A-Z]{3}[0-9]{2}-[A-Z]{1}_variant', + r'antimarin[0-9]{4}_[0-9]{4,5}_variant', + r'streptomedb.[0-9]{2,4}_variant', + r'mibig_[0-9]{3}_variant', + ] + + def resolve(self, sequence_id: SequenceId) -> SequenceId: + for seq_re in self._regexps: + matches = re.findall(seq_re, str(sequence_id)) + if len(matches) > 0: + res = matches[0] + if res.endswith('_variant'): + return res[:-len('_variant')] + return res + raise IndexError(f'Cannot resolve base for {sequence_id}') diff --git a/src/markov_probability_model/base/sequence.py b/src/markov_probability_model/base/sequence.py new file mode 100644 index 0000000..b973a9b --- /dev/null +++ b/src/markov_probability_model/base/sequence.py @@ -0,0 +1,38 @@ +from src.markov_probability_model.base.base_sequence_id_resolver import SequenceId, BaseSequenceIdResolver, \ + SimpleBaseSequenceIdResolver +from src.markov_probability_model.base.alphabet import Aminoacid, ScoredAminoacid, AlignedAminoacid, \ + AlignedScoredAminoacid +from typing import List, Optional, Generic, TypeVar + +S = TypeVar('S') + + +class Sequence(Generic[S]): + def __init__(self, sequence_id: SequenceId, symbols: List[S], + base_seq_id_resolver: Optional[BaseSequenceIdResolver] = SimpleBaseSequenceIdResolver()): + self.sequence_id = sequence_id + self.symbols = symbols + self._base_seq_id_resolver = base_seq_id_resolver + + @property + def base_sequence_id(self): + return self._base_seq_id_resolver.resolve(self.sequence_id) + + def __len__(self): + return len(self.symbols) + + +class AminoacidSequence(Sequence[Aminoacid]): + pass + + +class ScoredAminoacidSequence(Sequence[ScoredAminoacid]): + pass + + +class AlignedAminoacidSequence(Sequence[AlignedAminoacid]): + pass + + +class AlignedScoredAminoacidSequence(Sequence[AlignedScoredAminoacid]): + pass diff --git a/src/markov_probability_model/base/utils.py b/src/markov_probability_model/base/utils.py new file mode 100644 index 0000000..a256158 --- /dev/null +++ b/src/markov_probability_model/base/utils.py @@ -0,0 +1,31 @@ +import numpy as np + +from typing import List + + +def my_log(a): + if a == 0: + return -np.inf + return np.log(a) + + +def my_exp(a): + if a == -np.inf: + return 0 + res = np.exp(a) + if isinstance(res, np.ndarray): + return res[0] + return res + + +def log_add_exp(l: List[float]): + l = list(filter(lambda x: x != -np.inf, l)) + if len(l) == 0: + return -np.inf + if len(l) == 1: + return l[0] + l = sorted(l) + res = np.logaddexp(l[0], l[1]) + for i in l[2:]: + res = np.logaddexp(res, i) + return res \ No newline at end of file diff --git a/src/markov_probability_model/data_loader/alignments_loader.py b/src/markov_probability_model/data_loader/alignments_loader.py new file mode 100644 index 0000000..bccb280 --- /dev/null +++ b/src/markov_probability_model/data_loader/alignments_loader.py @@ -0,0 +1,30 @@ +import abc + +from typing import List, TypeVar +from src.markov_probability_model.pairwise_alignment.sequence_aligner import PairwiseAlignmentOutputWithLogs +from src.markov_probability_model.data_loader.data_loader import PairwiseAlignmentDataLoader, TwoSequenceListsData +from src.markov_probability_model.base.sequence import AminoacidSequence, ScoredAminoacidSequence +from src.markov_probability_model.base.alphabet import Gap, Aminoacid, ScoredAminoacid + + +class AlignmentsLoader(PairwiseAlignmentDataLoader): + @abc.abstractmethod + def load_alignments(self) -> List[PairwiseAlignmentOutputWithLogs]: + pass + + def load_data(self) -> TwoSequenceListsData: + alignments = self.load_alignments() + seqs1: List[AminoacidSequence] = [] + seqs2: List[ScoredAminoacidSequence] = [] + for alignment in alignments: + s1: List[Aminoacid] = [] + for s in alignment.aligned_sequence1.symbols: + if s != Gap(): + s1.append(s) + seqs1.append(AminoacidSequence(alignment.aligned_sequence1.sequence_id, s1)) + s2: List[ScoredAminoacid] = [] + for s in alignment.aligned_sequence2.symbols: + if s != Gap(): + s2.append(s) + seqs2.append(ScoredAminoacidSequence(alignment.aligned_sequence2.sequence_id, s2)) + return TwoSequenceListsData(seqs1, seqs2) diff --git a/src/markov_probability_model/data_loader/data_loader.py b/src/markov_probability_model/data_loader/data_loader.py new file mode 100644 index 0000000..8d23d70 --- /dev/null +++ b/src/markov_probability_model/data_loader/data_loader.py @@ -0,0 +1,22 @@ +import abc + +from src.markov_probability_model.base.sequence import AminoacidSequence, ScoredAminoacidSequence +from typing import List, Generic, TypeVar + +D = TypeVar('D') + + +class DataLoader(abc.ABC, Generic[D]): + @abc.abstractmethod + def load_data(self) -> D: + pass + + +class TwoSequenceListsData: + def __init__(self, sequences1: List[AminoacidSequence], sequences2: List[ScoredAminoacidSequence]): + self.sequences1 = sequences1 + self.sequences2 = sequences2 + + +class PairwiseAlignmentDataLoader(DataLoader[TwoSequenceListsData], abc.ABC): + pass diff --git a/src/markov_probability_model/data_loader/fdr_loader.py b/src/markov_probability_model/data_loader/fdr_loader.py new file mode 100644 index 0000000..a7e5e8d --- /dev/null +++ b/src/markov_probability_model/data_loader/fdr_loader.py @@ -0,0 +1,69 @@ +import abc +import pandas as pd +import os +import copy + +from src.markov_probability_model.pairwise_alignment.fdr import FdrData, FdrParameters, FdrGenerator +from src.markov_probability_model.pairwise_alignment.sequence_aligner import ScoredPairwiseAlignmentOutput +from src.markov_probability_model.base.sequence import AlignedAminoacidSequence, AlignedScoredAminoacidSequence +from src.markov_probability_model.base.base_sequence_id_resolver import SequenceId +from typing import List, Dict + + +class FdrLoader(abc.ABC): + @abc.abstractmethod + def load_fdr(self) -> List[Dict[str, FdrData]]: + pass + + +class FdrGeneratorFromReport(FdrLoader): + def __init__(self, data_dir: str, fdr_parameters: List[FdrParameters]): + self._data_dir = data_dir + self._fdr_parameters = copy.deepcopy(fdr_parameters) + for f in self._fdr_parameters: + f.pairs_df_logpath = None + f.best_pairs_df_logpath = None + + def load_fdr(self) -> List[Dict[str, FdrData]]: + report = pd.read_csv(os.path.join(self._data_dir, 'report.csv')) + alignments: List[ScoredPairwiseAlignmentOutput] = [ + self.NerpaFdrAlignment(score, seq1, seq2.split('/')[-1]) + for score, seq1, seq2 in zip(report['score'], report['mol id'], report['prediction id']) + ] + fdrs = FdrGenerator(alignments, self._fdr_parameters).generate_fdr() + return [{'NERPA': f} for f in fdrs] + + class NerpaFdrAlignment(ScoredPairwiseAlignmentOutput): + def __init__(self, score: float, sequence_id1: SequenceId, sequence_id2: SequenceId): + super().__init__(AlignedAminoacidSequence(sequence_id1, []), + AlignedScoredAminoacidSequence(sequence_id2, [])) + self._score = score + + def score(self) -> float: + return self._score + + +class CsvFdrLoader(FdrLoader): + def __init__(self, data_dir: str, fdr_parameters: List[FdrParameters]): + self._data_dir = data_dir + self._fdr_parameters = fdr_parameters + + def load_fdr(self) -> List[Dict[str, FdrData]]: + return [self._load_single_fdr(p) for p in self._fdr_parameters] + + def _load_single_fdr(self, p: FdrParameters) -> Dict[str, FdrData]: + t = pd.read_csv(os.path.join( + self._data_dir, f'FDR_top{p.topk}_{p.relative_to}_scores_with_garlic.csv')) + return {'NERPA': FdrData(_fdr_row_to_array(t['FDR Nerpa'])), + 'GARLIC': FdrData(_fdr_row_to_array(t['FDR Garlic']))} + + +def _fdr_row_to_array(row, max_len=500) -> List[float]: + a = [] + for c in row: + if c == '-': + break + a.append(float(c)) + if len(a) > max_len: + a = a[:max_len] + return list(a) diff --git a/src/markov_probability_model/data_loader/mibig_alignments_loader.py b/src/markov_probability_model/data_loader/mibig_alignments_loader.py new file mode 100644 index 0000000..f38f8be --- /dev/null +++ b/src/markov_probability_model/data_loader/mibig_alignments_loader.py @@ -0,0 +1,65 @@ +import pandas + +from src.markov_probability_model.data_loader.alignments_loader import AlignmentsLoader, PairwiseAlignmentOutputWithLogs +from src.markov_probability_model.base.alphabet import Aminoacid, ScoredAminoacid, Gap +from src.markov_probability_model.base.sequence import AlignedAminoacidSequence, AlignedScoredAminoacidSequence +from typing import List, Tuple, Optional + + +class MibigAlignmentsLoader(AlignmentsLoader): + def __init__(self, mibig_filepath: str): + self._mibig_filepath = mibig_filepath + + def load_alignments(self) -> List[PairwiseAlignmentOutputWithLogs]: + mibig = pandas.read_csv(self._mibig_filepath) + outp: List[PairwiseAlignmentOutputWithLogs] = [] + for bgc, alignment_data in mibig.groupby(mibig['BGC']): + sequence, prediction = [], [] + for seq_symbol, base_seq_symbol, pred_symbols, e_domain, m_domain in zip(alignment_data['rBan AA-ID'], + alignment_data['rBan AA'], + alignment_data['PRED_TOP5'], + alignment_data['L-/D- (E domain)'], + alignment_data['M domain']): + if pandas.isna(seq_symbol) or seq_symbol == '-': + s1 = '-' + sequence.append(Gap()) + else: + s1, modification1, methylation1 = _simplify_seq_symbol(seq_symbol, base_seq_symbol) + sequence.append(Aminoacid(s1, modification1, methylation1)) + + if pandas.isna(pred_symbols) or pred_symbols == '-': + prediction.append(Gap()) + else: + s2 = _choose_pred_symbol(pred_symbols, s1) + modification2: str = None if pandas.isna(e_domain) or e_domain == '-' else '@' + e_domain + methylation2: bool = False if pandas.isna(m_domain) or m_domain == '-' else ( + m_domain.lower() == 'true') + prediction.append(ScoredAminoacid(s2, modification2, methylation2)) + + outp.append(PairwiseAlignmentOutputWithLogs( + AlignedAminoacidSequence(bgc, sequence), AlignedScoredAminoacidSequence(bgc, prediction), logs='')) + return outp + + +def _simplify_base_seq_symbol(s: str) -> str: + return s.split('+')[0] + + +def _simplify_seq_symbol(s: str, base_s: str) -> Tuple[str, Optional[str], bool]: + modification = None + if '@L' in s: + modification = '@L' + elif '@D' in s: + modification = '@D' + return _simplify_base_seq_symbol(base_s), modification, ('NMe' in s) + + +def _choose_pred_symbol(pred_symbols, seq_symbol) -> str: + if pandas.isna(pred_symbols): + return '-' + pred_symbols = pred_symbols.split(';') + base_symbols, scores = zip(*[(s.split('(')[0], float(s.split('(')[1].split(')')[0])) for s in pred_symbols]) + max_score_base_symbols = [base_symbols[i] for i in range(len(base_symbols)) if scores[i] == scores[0]] + if seq_symbol in max_score_base_symbols: + return pred_symbols[max_score_base_symbols.index(seq_symbol)] + return pred_symbols[0] diff --git a/src/markov_probability_model/data_loader/pickle_data_loader.py b/src/markov_probability_model/data_loader/pickle_data_loader.py new file mode 100644 index 0000000..f9d2ee9 --- /dev/null +++ b/src/markov_probability_model/data_loader/pickle_data_loader.py @@ -0,0 +1,33 @@ +import pickle +import os + +from src.markov_probability_model.data_loader.data_loader import PairwiseAlignmentDataLoader, TwoSequenceListsData +from src.markov_probability_model.base.sequence import AminoacidSequence, ScoredAminoacidSequence, SequenceId +from src.markov_probability_model.base.alphabet import Aminoacid, ScoredAminoacid +from typing import List + + +class PickleDataLoader(PairwiseAlignmentDataLoader): + def __init__(self, data_dir: str): + self._data_dir = data_dir + + def load_data(self) -> TwoSequenceListsData: + structures = self._load_sequences1() + predictions = self._load_sequences2() + return TwoSequenceListsData(structures, predictions) + + def _load_sequences1(self) -> List[AminoacidSequence]: + structs = pickle.load(open(os.path.join(self._data_dir, 'structures.pickle'), 'rb')) + structures: List[AminoacidSequence] = [] + for bgc, struct in structs.items(): + structures.append(AminoacidSequence(bgc, [Aminoacid(s.lower(), None, False) for s in struct])) + return structures + + def _load_sequences2(self) -> List[ScoredAminoacidSequence]: + pred = pickle.load(open(os.path.join(self._data_dir, 'pred_score.pickle'), 'rb')) + predictions: List[ScoredAminoacidSequence] = [] + for bgc, prs in pred.items(): + for i, pr in enumerate(prs): + predictions.append(ScoredAminoacidSequence( + SequenceId(f'{bgc}_{i}'), [ScoredAminoacid(s.lower(), None, False) for s in pr])) + return predictions diff --git a/src/markov_probability_model/data_loader/raw_data_parser.py b/src/markov_probability_model/data_loader/raw_data_parser.py new file mode 100644 index 0000000..e283516 --- /dev/null +++ b/src/markov_probability_model/data_loader/raw_data_parser.py @@ -0,0 +1,85 @@ +import os +import re +import sys + +from src.markov_probability_model.data_loader.data_loader import PairwiseAlignmentDataLoader, TwoSequenceListsData +from src.markov_probability_model.base.sequence import AminoacidSequence, ScoredAminoacidSequence, SequenceId +from src.markov_probability_model.base.alphabet import Aminoacid, ScoredAminoacid, AminoacidAlphabet +from typing import List, Tuple, Optional + + +class RawDataParser(PairwiseAlignmentDataLoader): + def __init__(self, data_dir: str, old_omega_a: AminoacidAlphabet): + self._data_dir = data_dir + self._old_omega_a = old_omega_a + + def load_data(self) -> TwoSequenceListsData: + structures = self._load_sequences1() + predictions = self._load_sequences2() + return TwoSequenceListsData(structures, predictions) + + def _load_sequences1(self) -> List[AminoacidSequence]: + structures: List[AminoacidSequence] = [] + with open(os.path.join(self._data_dir, 'structures.info'), 'r') as f: + lines = [l.rstrip() for l in f.readlines() if len(l) > 0] + for l in lines: + bgc_name = SequenceId(l.split()[0]) + bgc_symbols = l.split()[1].split(';')[0].split(',') + bgc_symbols = list(map(lambda x: _parse_symbol(x, self._old_omega_a), bgc_symbols)) + structures.append( + AminoacidSequence(bgc_name, + [Aminoacid(s, modification, methylation) for s, modification, methylation in + bgc_symbols])) + return structures + + def _load_sequences2(self) -> List[ScoredAminoacidSequence]: + pred: List[ScoredAminoacidSequence] = [] + path = os.path.join(self._data_dir, 'predictions') + for subdir, _, files in os.walk(path): + if subdir != path: + continue + for file in files: + pred_name = SequenceId(file) + with open(os.path.join(subdir, file), 'r') as f: + lines = f.readlines() + pred_symbols = [] + for l in lines: + l_split = l.split() + base_name = l_split[2].split(';')[0] + modification = '@D' if 'd-' in l_split[1].lower() else '@L' + methylation = ('+mt' in l_split[1].lower()) + pred_symbols.append(ScoredAminoacid(base_name, modification, methylation)) + pred.append(ScoredAminoacidSequence(pred_name, pred_symbols)) + return pred + + +def _parse_base_symbol(y: str, old_omega_a: AminoacidAlphabet) -> str: + x = y.lower() + for p in sorted(re.findall(r'[a-z]+', x), key=lambda a: -len(a)): + if p in [a.name for a in old_omega_a] and p not in ['dhpg']: + return p + if x[-2:] in ['x0', 'x1', 'x2', 'x3', 'x4']: + return 'none' + if x[-4:] in ['mabu', 'adda', 'dhpg']: + return x.lower()[-4:-1] + if x[-4:] in ['bala', 'bphe', 'corn'] or x[-5:] in ['dhabu', 'bhend', 'dhcys']: + return x.lower()[-3:] + if x[-5:] in ['valol']: + return 'val' + res = sorted(re.findall(r'[a-z]+', x), key=lambda a: -len(a))[0] + if res in ['put', 'end', 'hva', 'dbu', 'piz', 'hmp', 'hpr', 'dov', 'pha', 'suc', 'eta', 'hyv', 'dpb', 'ahp', + 'uda', 'cha', 'bht', 'dhb', 'cit', 'hty', 'tcl', 'met']: + return res + sys.stderr.write(f'No alphabet symbol for {x}, returning none\n') + return 'none' + + +def _parse_symbol(y: str, old_omega_a: AminoacidAlphabet) -> Tuple[str, Optional[str], bool]: + name = _parse_base_symbol(y, old_omega_a) + modification: Optional[str] = None + if '@L' in y: + modification = '@L' + elif '@D' in y: + modification = '@D' + methylation: bool = ('NMe' in y) or len(re.findall(r'[0-9]Me', y)) > 0 + return name, modification, methylation diff --git a/src/markov_probability_model/hmm/hmm.py b/src/markov_probability_model/hmm/hmm.py new file mode 100644 index 0000000..e877b19 --- /dev/null +++ b/src/markov_probability_model/hmm/hmm.py @@ -0,0 +1,34 @@ +from typing import TypeVar, Generic, NewType, Dict, List + +S = TypeVar('S') +O = TypeVar('O') + +Prob = NewType('Prob', float) + + +class HMM(Generic[S, O]): + def __init__(self, start_probs: Dict[S, Prob], + transition_probs: Dict[S, Dict[S, Prob]], + observation_probs: Dict[S, Dict[O, Prob]]): + self._start_probs = start_probs + self._transition_probs = transition_probs + self._observation_probs = observation_probs + + @property + def states(self) -> List[S]: + return list(self._start_probs.keys()) + + def observations(self, s: S) -> List[O]: + return list(self._observation_probs[s].keys()) + + def start_prob(self, s: S) -> Prob: + return self._start_probs[s] + + def transition_prob(self, frm: S, to: S) -> Prob: + return self._transition_probs[frm][to] + + def observation_prob(self, s: S, o: O) -> Prob: + return self._observation_probs[s][o] + + def state_index(self, s: S) -> int: + return self.states.index(s) diff --git a/src/markov_probability_model/hmm/pairwise_alignment_hmm.py b/src/markov_probability_model/hmm/pairwise_alignment_hmm.py new file mode 100644 index 0000000..6529966 --- /dev/null +++ b/src/markov_probability_model/hmm/pairwise_alignment_hmm.py @@ -0,0 +1,98 @@ +import numpy as np + +from src.markov_probability_model.hmm.hmm import HMM, Prob +from src.markov_probability_model.base.alphabet import AminoacidAlphabet, ScoredAminoacidAlphabet, Symbol, Gap, \ + Aminoacid, ScoredAminoacid +from typing import Dict, NewType + + +class PairwiseAlignmentHMMParameters: + def __init__(self, + omega_a: AminoacidAlphabet, omega_b: ScoredAminoacidAlphabet, + mu: np.ndarray, tau: np.ndarray, + p: Dict[Aminoacid, Dict[ScoredAminoacid, Prob]], + q_a: Dict[Aminoacid, Prob], q_b: Dict[ScoredAminoacid, Prob]): + if mu.shape != (3,) or tau.shape != (3, 3): + raise IndexError(f'Expected 3 HMM states for mu = {mu} and tau = {tau}') + self.omega_a = omega_a + self.omega_b = omega_b + self.mu = mu + self.tau = tau + self.p = p + self.q_a = q_a + self.q_b = q_b + + def log_to(self, log_filepath): + with open(log_filepath, 'w') as log: + print('Omega_a: {}'.format(self.omega_a), file=log) + print('', file=log) + print('Omega_b: {}'.format(self.omega_b), file=log) + print(file=log) + print('Tau: {}'.format(self.tau), file=log) + print('Mu: {}'.format(self.mu), file=log) + # print(file=log) + # print('P example:', file=log) + # for a in [Aminoacid('arg', '@L', False)]: + # for b in [ScoredAminoacid('arg(100.0)', '@L', False), + # ScoredAminoacid('arg(80.0)', '@L', False), + # ScoredAminoacid('arg(70.0)', '@L', False), + # ScoredAminoacid('arg(60.0)', '@L', False), + # ScoredAminoacid('ala(100.0)', '@L', False), + # ScoredAminoacid('ala(80.0)', '@L', False), + # ScoredAminoacid('ala(70.0)', '@L', False), + # ScoredAminoacid('ala(60.0)', '@L', False)]: + # print(f'\tp[{a}][{b}] = {self.p[a][b]}', file=log) + print(file=log) + print('p: {}'.format(self.p), file=log) + print(file=log) + print('q_a: {}'.format(self.q_a), file=log) + print('q_b: {}'.format(self.q_b), file=log) + + +PairwiseAlignmentHmmState = NewType('PairwiseAlignmentHmmState', str) + + +class PairwiseAlignmentHmmObservation: + def __init__(self, first: Symbol, second: Symbol): + self.first = first + self.second = second + + def __hash__(self): + return hash(str(self)) + + def __str__(self): + return str(self.first) + '-' + str(self.second) + + def __repr__(self): + return str(self) + + def __eq__(self, other): + return str(self) == str(other) + + +class PairwiseAlignmentHmm(HMM[PairwiseAlignmentHmmState, PairwiseAlignmentHmmObservation]): + def __init__(self, params: PairwiseAlignmentHMMParameters): + self._params = params + self.M = PairwiseAlignmentHmmState('M') + self.A = PairwiseAlignmentHmmState('A') + self.B = PairwiseAlignmentHmmState('B') + states = [self.M, self.A, self.B] + start_probs: Dict[PairwiseAlignmentHmmState, Prob] = dict(zip(states, list(params.mu))) + transition_probs: Dict[PairwiseAlignmentHmmState, Dict[PairwiseAlignmentHmmState, Prob]] = {} + for i, frm in enumerate(states): + transition_probs[frm] = {} + for j, to in enumerate(states): + transition_probs[frm][to] = params.tau[i][j] + observation_probs: Dict[PairwiseAlignmentHmmState, Dict[PairwiseAlignmentHmmObservation, Prob]] = {} + for st in states: + observation_probs[st] = {} + for a in params.omega_a: + observation_probs[self.A][PairwiseAlignmentHmmObservation(a, Gap())] = params.q_a[a] + for b in params.omega_b: + observation_probs[self.M][PairwiseAlignmentHmmObservation(a, b)] = params.p[a][b] + observation_probs[self.B][PairwiseAlignmentHmmObservation(Gap(), b)] = params.q_b[b] + super().__init__(start_probs, transition_probs, observation_probs) + + @property + def parameters(self): + return self._params diff --git a/src/markov_probability_model/main.py b/src/markov_probability_model/main.py new file mode 100644 index 0000000..259e38e --- /dev/null +++ b/src/markov_probability_model/main.py @@ -0,0 +1,119 @@ +import os +import argparse + +from src.markov_probability_model.data_loader.mibig_alignments_loader import MibigAlignmentsLoader, \ + PairwiseAlignmentOutputWithLogs +from src.markov_probability_model.data_loader.data_loader import TwoSequenceListsData +from src.markov_probability_model.data_loader.raw_data_parser import RawDataParser +from src.markov_probability_model.data_loader.fdr_loader import FdrGeneratorFromReport +from src.markov_probability_model.parameters.ml_parameters_estimator import MaxLikelihoodParametersEstimator +from src.markov_probability_model.parameters.baum_welch_parameters_estimator import BaumWelchParametersEstimator +from src.markov_probability_model.parameters.utils import get_alphabets_from_alignments +from src.markov_probability_model.hmm.pairwise_alignment_hmm import PairwiseAlignmentHmm, PairwiseAlignmentHMMParameters +from src.markov_probability_model.pairwise_alignment.algo.viterbi import Viterbi, ViterbiOutput +from src.markov_probability_model.pairwise_alignment.algo.maximum_posterior_decoding import \ + MaximumPosteriorDecodingOutput, MaximumPosteriorDecoding +from src.markov_probability_model.pairwise_alignment.algo.maximum_accuracy import MaximumAccuracyOutput, MaximumAccuracy +from src.markov_probability_model.pairwise_alignment.algo.global_viterbi import GlobalViterbi, GlobalViterbiOutput +from src.markov_probability_model.pairwise_alignment.score_augmentations import NullHypothesisScoreAugmentator, \ + IdentityScoreAugmentator +from src.markov_probability_model.pairwise_alignment.alignment_generator import AllPairsAlignmentGenerator +from src.markov_probability_model.pairwise_alignment.fdr import FdrGenerator, FdrParameters, plot_fdrs, FdrData +from src.markov_probability_model.pairwise_alignment.logger import HtmlAlignmentsLogger +from typing import List, Dict + + +def run(data_dir: str, prob_gen_filepath: str, + results_dir: str, mibig_path: str, pool_sz: int, algo: List[str], + use_bw: bool, bw_iters: int, log_alignments: bool, topk: List[int]): + print('Starting alignments generation using Hidden Markov Model...') + print(f'Generating results for {data_dir}') + + res_parameters_folder = os.path.join(results_dir, 'parameters') + res_fdr_folder = os.path.join(results_dir, 'fdr') + res_alignments_folder = os.path.join(results_dir, 'alignments') + for folder in [res_parameters_folder, res_fdr_folder, res_alignments_folder]: + if not os.path.exists(folder): + os.makedirs(folder) + + print(' Loading Mibig alignments...') + ground_truth_alignments: List[PairwiseAlignmentOutputWithLogs] = \ + MibigAlignmentsLoader(mibig_path).load_alignments() + + print(' Loading data...') + data: TwoSequenceListsData = RawDataParser( + data_dir, get_alphabets_from_alignments(ground_truth_alignments)[0]).load_data() + + print(' Estimating parameters...') + parameters: PairwiseAlignmentHMMParameters = \ + MaxLikelihoodParametersEstimator(ground_truth_alignments, data, prob_gen_filepath, + log_dir=res_parameters_folder).calculate_parameters() + + if use_bw: + parameters = BaumWelchParametersEstimator(ground_truth_alignments, data, prob_gen_filepath, + init_params=parameters, n_iterations=bw_iters, + recalculate_transition_probs=False, + log_dir=res_parameters_folder, + pool_sz=pool_sz).calculate_parameters() + + hmm = PairwiseAlignmentHmm(parameters) + + fdr_parameters: List[FdrParameters] = [ + FdrParameters(topk, rt, None, None) for topk in topk for rt in ['mol', 'genome']] + all_fdr: List[Dict[str, FdrData]] = FdrGeneratorFromReport(data_dir, fdr_parameters).load_fdr() + + assert len(algo) > 0 + for algo in algo: + print(f' Generating {algo} alignments...') + score_augmentator = {'viterbi': NullHypothesisScoreAugmentator, + 'global_viterbi': NullHypothesisScoreAugmentator, + 'maximum_posterior_decoding': IdentityScoreAugmentator, + 'maximum_accuracy': IdentityScoreAugmentator}[algo]() + sequence_aligner = {'viterbi': Viterbi, 'global_viterbi': GlobalViterbi, + 'maximum_posterior_decoding': MaximumPosteriorDecoding, + 'maximum_accuracy': MaximumAccuracy}[algo](hmm, score_augmentator) + alignment_type = {'viterbi': ViterbiOutput, 'global_viterbi': GlobalViterbiOutput, + 'maximum_posterior_decoding': MaximumPosteriorDecodingOutput, + 'maximum_accuracy': MaximumAccuracyOutput}[algo] + logger = None + if log_alignments: + logger = HtmlAlignmentsLogger(os.path.join(res_alignments_folder, algo)) + alignments = AllPairsAlignmentGenerator[alignment_type]( + data, sequence_aligner, logger=logger, pool_sz=pool_sz).generate_alignments() + + print(f' Generating {algo} FDR...') + fdr_parameters: List[FdrParameters] = [ + FdrParameters(k, rt, + os.path.join(res_fdr_folder, f'pairs_{algo}_{rt}.csv'), + os.path.join(res_fdr_folder, f'best_pairs_{algo}_{rt}_top{k}.csv'), + ) for k in topk for rt in ['mol', 'genome'] + ] + algo_fdrs: List[FdrData] = FdrGenerator(alignments, fdr_parameters).generate_fdr() + for i, fdr in enumerate(algo_fdrs): + all_fdr[i][algo] = fdr + + print(' Saving FDRs...') + plot_fdrs(fdr_parameters, all_fdr, save_dir=res_fdr_folder) + + print(' Done!') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--data_dir', type=str, default='data/sequences/res_small_with_modifications') + parser.add_argument('--results_dir', type=str, default='results/res_small') + parser.add_argument('--mibig_path', type=str, default='data/sequences/mibig.csv') + parser.add_argument('--pool_sz', type=int, default=4) + + parser.add_argument('--algo', nargs='+', + default=['viterbi', 'global_viterbi', 'maximum_accuracy', 'maximum_posterior_decoding']) + parser.add_argument('--use_bw', type=bool, default=False) + parser.add_argument('--bw_iters', type=int, default=10) # 10-15 is enough + parser.add_argument('--log_alignments', type=bool, default=False) + + parser.add_argument('--topk', type=list, default=[1, 3, 5, 10]) + + args = parser.parse_args() + + run(args.data_dir, 'data/parameters/prob_gen.cfg', args.results_dir, args.mibig_path, args.pool_sz, + args.algo, args.use_bw, args.bw_iters, args.log_alignments, args.topk) diff --git a/src/markov_probability_model/pairwise_alignment/algo/global_viterbi.py b/src/markov_probability_model/pairwise_alignment/algo/global_viterbi.py new file mode 100644 index 0000000..4a97ee0 --- /dev/null +++ b/src/markov_probability_model/pairwise_alignment/algo/global_viterbi.py @@ -0,0 +1,39 @@ +from src.markov_probability_model.pairwise_alignment.sequence_aligner import ScoredPairwiseAlignmentOutput, \ + PairwiseAlignmentOutputWithLogs +from src.markov_probability_model.base.sequence import AminoacidSequence, ScoredAminoacidSequence, \ + AlignedAminoacidSequence, AlignedScoredAminoacidSequence +from src.markov_probability_model.hmm.pairwise_alignment_hmm import PairwiseAlignmentHmm +from src.markov_probability_model.pairwise_alignment.sequence_aligner import PairwiseSequenceAligner +from src.markov_probability_model.pairwise_alignment.score_augmentations import ScoreAugmentator +from src.markov_probability_model.pairwise_alignment.algo.utils import calculate_log_alpha +from src.markov_probability_model.pairwise_alignment.algo.viterbi import Viterbi, ViterbiOutput +from src.markov_probability_model.base.utils import log_add_exp + + +class GlobalViterbiOutput(PairwiseAlignmentOutputWithLogs, ScoredPairwiseAlignmentOutput): + def __init__(self, aligned_sequence1: AlignedAminoacidSequence, aligned_sequence2: AlignedScoredAminoacidSequence, + global_viterbi_score: float, logs: str = ''): + super(GlobalViterbiOutput, self).__init__(aligned_sequence1, aligned_sequence2, logs) + self.global_viterbi_score = global_viterbi_score + + def score(self): + return self.global_viterbi_score + + +class GlobalViterbi(PairwiseSequenceAligner[GlobalViterbiOutput]): + def __init__(self, hmm: PairwiseAlignmentHmm, sa: ScoreAugmentator): + self._hmm = hmm + self._sa = sa + self._viterbi = Viterbi(hmm, sa) + + def align(self, seq1: AminoacidSequence, seq2: ScoredAminoacidSequence) -> GlobalViterbiOutput: + hmm = self._hmm + n, m, k = len(seq1.symbols), len(seq2.symbols), len(hmm.states) + + alpha = calculate_log_alpha(seq1, seq2, hmm) + score = log_add_exp([alpha[n, m, x] for x in range(k)]) + o: ViterbiOutput = self._viterbi.align(seq1, seq2) + + return GlobalViterbiOutput(o.aligned_sequence1, o.aligned_sequence2, + self._sa.recalculate_score(score, seq1, seq2, hmm.parameters), + logs=o.logs) diff --git a/src/markov_probability_model/pairwise_alignment/algo/maximum_accuracy.py b/src/markov_probability_model/pairwise_alignment/algo/maximum_accuracy.py new file mode 100644 index 0000000..acb8555 --- /dev/null +++ b/src/markov_probability_model/pairwise_alignment/algo/maximum_accuracy.py @@ -0,0 +1,97 @@ +import numpy as np + +from src.markov_probability_model.pairwise_alignment.sequence_aligner import ScoredPairwiseAlignmentOutput, \ + PairwiseAlignmentOutputWithLogs +from src.markov_probability_model.base.sequence import AminoacidSequence, ScoredAminoacidSequence, \ + AlignedAminoacidSequence, AlignedScoredAminoacidSequence +from src.markov_probability_model.hmm.pairwise_alignment_hmm import PairwiseAlignmentHmm +from src.markov_probability_model.pairwise_alignment.sequence_aligner import PairwiseSequenceAligner +from src.markov_probability_model.pairwise_alignment.score_augmentations import ScoreAugmentator +from src.markov_probability_model.pairwise_alignment.algo.utils import calculate_log_alpha, calculate_log_beta, \ + calculate_marginal_prob, log_marginal_prob_for_alignment +from src.markov_probability_model.base.alphabet import AlignedAminoacid, AlignedScoredAminoacid, Gap +from typing import List + + +class MaximumAccuracyOutput(PairwiseAlignmentOutputWithLogs, ScoredPairwiseAlignmentOutput): + def __init__(self, aligned_sequence1: AlignedAminoacidSequence, aligned_sequence2: AlignedScoredAminoacidSequence, + mpd_score: float, logs: str): + super(MaximumAccuracyOutput, self).__init__(aligned_sequence1, aligned_sequence2, logs) + self.mpd_score = mpd_score + + def score(self): + return self.mpd_score + + +class MaximumAccuracy(PairwiseSequenceAligner[MaximumAccuracyOutput]): + def __init__(self, hmm: PairwiseAlignmentHmm, sa: ScoreAugmentator): + self._hmm = hmm + self._sa = sa + + def align(self, seq1: AminoacidSequence, seq2: ScoredAminoacidSequence) -> MaximumAccuracyOutput: + hmm = self._hmm + n, m, k = len(seq1.symbols), len(seq2.symbols), len(hmm.states) + + alpha = calculate_log_alpha(seq1, seq2, hmm) + beta = calculate_log_beta(seq1, seq2, hmm) + marginal_prob = calculate_marginal_prob(seq1, seq2, hmm, alpha, beta) + + d = {hmm.M: (1, 1), hmm.A: (1, 0), hmm.B: (0, 1)} + max_accuracy = np.full((n + 1, m + 1, k), None) + max_accuracy[0, 0] = 0 + prev_state = np.full((n + 1, m + 1, k), None) + for i in range(n + 1): + for j in range(m + 1): + for v in range(k): + di, dj = d[hmm.states[v]] + if di > i or dj > j: + continue + for prev in range(k): + score = marginal_prob[i, j, v] + if max_accuracy[i - di, j - dj, prev] is None: + continue + new_score = score + max_accuracy[i - di, j - dj, prev] + if max_accuracy[i, j, v] is None or max_accuracy[i, j, v] < new_score: + max_accuracy[i, j, v] = new_score + prev_state[i, j, v] = prev + + def log_calculate_score(v, i, j): + if v == 0: + return 'P_M({}, {}) = {}'.format(i, j, marginal_prob[i, j, v]) + else: + return '' + + i, j, v = n, m, 0 + max_score = max_accuracy[i, j, v] + for tmp_v in range(k): + if max_accuracy[i, j, tmp_v] > max_accuracy[i, j, v]: + v = tmp_v + max_score = max_accuracy[i, j, tmp_v] + states = [] + logs = [] + while prev_state[i, j, v] is not None: + states.append(v) + logs.append(log_calculate_score(v, i, j)) + di, dj = d[hmm.states[v]] + prev = prev_state[i, j, v] + v, i, j = prev, i - di, j - dj + logs.reverse() + states.reverse() + assert i == 0 and j == 0, 'Internal error' + + aligned1: List[AlignedAminoacid] = [] + aligned2: List[AlignedScoredAminoacid] = [] + for st in states: + di, dj = d[hmm.states[st]] + aligned1.append(Gap() if di == 0 else seq1.symbols[i]) + aligned2.append(Gap() if dj == 0 else seq2.symbols[j]) + i += di + j += dj + + aligned_seq1: AlignedAminoacidSequence = AlignedAminoacidSequence(seq1.sequence_id, aligned1) + aligned_seq2: AlignedScoredAminoacidSequence = AlignedScoredAminoacidSequence(seq2.sequence_id, aligned2) + logs.append('Marginal probs:') + logs.append(' '.join(map(str, log_marginal_prob_for_alignment(aligned_seq1, aligned_seq2, hmm, marginal_prob)))) + return MaximumAccuracyOutput(aligned_seq1, aligned_seq2, + self._sa.recalculate_score(max_score, seq1, seq2, hmm.parameters), + logs='\n'.join(logs)) diff --git a/src/markov_probability_model/pairwise_alignment/algo/maximum_posterior_decoding.py b/src/markov_probability_model/pairwise_alignment/algo/maximum_posterior_decoding.py new file mode 100644 index 0000000..8bcd250 --- /dev/null +++ b/src/markov_probability_model/pairwise_alignment/algo/maximum_posterior_decoding.py @@ -0,0 +1,102 @@ +import numpy as np + +from src.markov_probability_model.pairwise_alignment.sequence_aligner import ScoredPairwiseAlignmentOutput, \ + PairwiseAlignmentOutputWithLogs +from src.markov_probability_model.base.sequence import AminoacidSequence, ScoredAminoacidSequence, \ + AlignedAminoacidSequence, AlignedScoredAminoacidSequence +from src.markov_probability_model.hmm.pairwise_alignment_hmm import PairwiseAlignmentHmm +from src.markov_probability_model.pairwise_alignment.sequence_aligner import PairwiseSequenceAligner +from src.markov_probability_model.pairwise_alignment.score_augmentations import ScoreAugmentator +from src.markov_probability_model.pairwise_alignment.algo.utils import calculate_log_alpha, calculate_log_beta, \ + calculate_marginal_prob, log_marginal_prob_for_alignment +from src.markov_probability_model.base.alphabet import AlignedAminoacid, AlignedScoredAminoacid, Gap +from typing import List + + +class MaximumPosteriorDecodingOutput(PairwiseAlignmentOutputWithLogs, ScoredPairwiseAlignmentOutput): + def __init__(self, aligned_sequence1: AlignedAminoacidSequence, aligned_sequence2: AlignedScoredAminoacidSequence, + mpd_score: float, logs: str): + super(MaximumPosteriorDecodingOutput, self).__init__(aligned_sequence1, aligned_sequence2, logs) + self.mpd_score = mpd_score + + def score(self): + return self.mpd_score + + +class MaximumPosteriorDecoding(PairwiseSequenceAligner[MaximumPosteriorDecodingOutput]): + def __init__(self, hmm: PairwiseAlignmentHmm, sa: ScoreAugmentator): + self._hmm = hmm + self._sa = sa + + def align(self, seq1: AminoacidSequence, seq2: ScoredAminoacidSequence) -> MaximumPosteriorDecodingOutput: + hmm = self._hmm + n, m, k = len(seq1.symbols), len(seq2.symbols), len(hmm.states) + + alpha = calculate_log_alpha(seq1, seq2, hmm) + beta = calculate_log_beta(seq1, seq2, hmm) + marginal_prob = calculate_marginal_prob(seq1, seq2, hmm, alpha, beta) + + d = {hmm.M: (1, 1), hmm.A: (1, 0), hmm.B: (0, 1)} + max_accuracy = np.full((n + 1, m + 1, k), None) + max_accuracy[0, 0] = 0 + prev_state = np.full((n + 1, m + 1, k), None) + for i in range(n + 1): + for j in range(m + 1): + for v in range(k): + di, dj = d[hmm.states[v]] + if di > i or dj > j: + continue + for prev in range(k): + score = marginal_prob[i, j, v] if v == 0 else \ + marginal_prob[i, :, v].sum() if v == 1 else marginal_prob[:, j, v].sum() + new_score = max_accuracy[i - di, j - dj, prev] + if new_score is None: + continue + new_score += score + if max_accuracy[i, j, v] is None or max_accuracy[i, j, v] < new_score: + max_accuracy[i, j, v] = new_score + prev_state[i, j, v] = prev + + def log_calculate_score(v, i, j): + if v == 0: + return 'P_M({}, {}) = {}'.format(i, j, marginal_prob[i, j, v]) + elif v == 1: + return 'P_A({}) = {}'.format(i, marginal_prob[i, :, v].sum()) + else: + return 'P_B({}) = {}'.format(j, marginal_prob[:, j, v].sum()) + + i, j, v = n, m, 0 + max_score = max_accuracy[i, j, v] + for tmp_v in range(k): + if max_accuracy[i, j, tmp_v] > max_accuracy[i, j, v]: + v = tmp_v + max_score = max_accuracy[i, j, tmp_v] + states = [] + logs = [] + while prev_state[i, j, v] is not None: + states.append(v) + logs.append(log_calculate_score(v, i, j)) + di, dj = d[hmm.states[v]] + prev = prev_state[i, j, v] + v, i, j = prev, i - di, j - dj + logs.reverse() + states.reverse() + assert i == 0 and j == 0, 'Internal error' + + aligned1: List[AlignedAminoacid] = [] + aligned2: List[AlignedScoredAminoacid] = [] + for st in states: + di, dj = d[hmm.states[st]] + aligned1.append(Gap() if di == 0 else seq1.symbols[i]) + aligned2.append(Gap() if dj == 0 else seq2.symbols[j]) + i += di + j += dj + + aligned_seq1: AlignedAminoacidSequence = AlignedAminoacidSequence(seq1.sequence_id, aligned1) + aligned_seq2: AlignedScoredAminoacidSequence = AlignedScoredAminoacidSequence(seq2.sequence_id, aligned2) + logs.append('Marginal probs:') + logs.append(' '.join(map(str, log_marginal_prob_for_alignment(aligned_seq1, aligned_seq2, hmm, marginal_prob)))) + return MaximumPosteriorDecodingOutput(AlignedAminoacidSequence(seq1.sequence_id, aligned1), + AlignedScoredAminoacidSequence(seq2.sequence_id, aligned2), + self._sa.recalculate_score(max_score, seq1, seq2, hmm.parameters), + logs='\n'.join(logs)) diff --git a/src/markov_probability_model/pairwise_alignment/algo/utils.py b/src/markov_probability_model/pairwise_alignment/algo/utils.py new file mode 100644 index 0000000..10f4fc0 --- /dev/null +++ b/src/markov_probability_model/pairwise_alignment/algo/utils.py @@ -0,0 +1,115 @@ +import numpy as np + +from src.markov_probability_model.base.alphabet import Gap +from src.markov_probability_model.base.sequence import AminoacidSequence, ScoredAminoacidSequence, AlignedAminoacid, \ + AlignedScoredAminoacid, AlignedAminoacidSequence, AlignedScoredAminoacidSequence +from src.markov_probability_model.base.utils import my_log, log_add_exp, my_exp +from src.markov_probability_model.hmm.pairwise_alignment_hmm import PairwiseAlignmentHmm, \ + PairwiseAlignmentHmmObservation +from typing import Tuple, Optional, List + + +def _get_symbols(seq1: AminoacidSequence, seq2: ScoredAminoacidSequence, i: int, j: int, di: int, dj: int) -> \ + Tuple[AlignedAminoacid, AlignedScoredAminoacid]: + return Gap() if di == 0 else seq1.symbols[i - 1], Gap() if dj == 0 else seq2.symbols[j - 1] + + +def calculate_log_alpha(seq1: AminoacidSequence, seq2: ScoredAminoacidSequence, + hmm: PairwiseAlignmentHmm) -> np.ndarray: + n, m = len(seq1), len(seq2) + + alpha = np.full((n + 1, m + 1, len(hmm.states)), -np.inf) + d = {hmm.M: (1, 1), hmm.A: (1, 0), hmm.B: (0, 1)} + for v in hmm.states: + di, dj = d[v] + symb1, symb2 = _get_symbols(seq1, seq2, di, dj, di, dj) + alpha[di, dj, hmm.state_index(v)] = my_log(hmm.start_prob(v)) + my_log( + hmm.observation_prob(v, PairwiseAlignmentHmmObservation(symb1, symb2))) + for i in range(n + 1): + for j in range(m + 1): + for v in hmm.states: + di, dj = d[v] + if i < di or j < dj: + continue + symb1, symb2 = _get_symbols(seq1, seq2, i, j, di, dj) + s = [] + for prev in hmm.states: + s.append(alpha[i - di, j - dj, hmm.state_index(prev)] + my_log(hmm.transition_prob(prev, v))) + + alpha[i, j, hmm.state_index(v)] = log_add_exp([ + alpha[i, j, hmm.state_index(v)], + my_log(hmm.observation_prob(v, PairwiseAlignmentHmmObservation(symb1, symb2))) + log_add_exp(s)]) + return alpha + + +def calculate_log_beta(seq1: AminoacidSequence, seq2: ScoredAminoacidSequence, hmm: PairwiseAlignmentHmm) -> np.ndarray: + n, m = len(seq1), len(seq2) + + beta = np.full((n + 1, m + 1, len(hmm.states)), -np.inf) + for v in hmm.states: + beta[n, m, hmm.state_index(v)] = 0 + d = {hmm.M: (1, 1), hmm.A: (1, 0), hmm.B: (0, 1)} + for i in range(n, -1, -1): + for j in range(m, -1, -1): + if i == 0 and j == 0: + continue + for v in hmm.states: + for nxt in hmm.states: + di, dj = d[nxt] + if i + di > n or j + dj > m: + continue + symb1, symb2 = _get_symbols(seq1, seq2, i + di, j + dj, di, dj) + beta[i, j, hmm.state_index(v)] = log_add_exp([ + beta[i, j, hmm.state_index(v)], + beta[i + di, j + dj, hmm.state_index(nxt)] + + my_log(hmm.observation_prob(nxt, PairwiseAlignmentHmmObservation(symb1, symb2))) + + my_log(hmm.transition_prob(v, nxt))]) + return beta + + +def calculate_log_marginal_prob(seq1: AminoacidSequence, seq2: ScoredAminoacidSequence, hmm: PairwiseAlignmentHmm, + alpha: np.ndarray, beta: np.ndarray) -> np.ndarray: + n, m, k = len(seq1), len(seq2), len(hmm.states) + sum_alpha = log_add_exp([alpha[n, m, x] for x in range(k)]) + marginal_prob = np.zeros((n + 1, m + 1, k)) + for i in range(n + 1): + for j in range(m + 1): + for s in range(k): + marginal_prob[i, j, s] = alpha[i, j, s] + beta[i, j, s] - sum_alpha + return marginal_prob + + +def calculate_marginal_prob(seq1: AminoacidSequence, seq2: ScoredAminoacidSequence, hmm: PairwiseAlignmentHmm, + alpha: np.ndarray, beta: np.ndarray) -> np.ndarray: + log_marginal_prob = calculate_log_marginal_prob(seq1, seq2, hmm, alpha, beta) + n, m, k = log_marginal_prob.shape + marginal_prob = np.array( + [[[my_exp(log_marginal_prob[i, j, p]) for p in range(k)] for j in range(m)] for i in range(n)]) + return marginal_prob + + +def _get_sequence_from_aligned(sequence): + return [a for a in sequence.symbols if a != Gap()] + + +def log_marginal_prob_for_alignment(sequence1: AlignedAminoacidSequence, sequence2: AlignedScoredAminoacidSequence, + hmm: PairwiseAlignmentHmm, + marginal_prob: Optional[np.ndarray] = None) -> List[float]: + if marginal_prob is None: + seq1 = AminoacidSequence(sequence1.sequence_id, _get_sequence_from_aligned(sequence1)) + seq2 = ScoredAminoacidSequence(sequence2.sequence_id, _get_sequence_from_aligned(sequence2)) + alpha = calculate_log_alpha(seq1, seq2, hmm) + beta = calculate_log_beta(seq1, seq2, hmm) + marginal_prob = calculate_marginal_prob(seq1, seq2, hmm, alpha, beta) + + d = {hmm.M: (1, 1), hmm.A: (1, 0), hmm.B: (0, 1)} + i, j = 0, 0 + probs: List[float] = [] + for k in range(len(sequence1)): + symb1: AlignedAminoacid = sequence1.symbols[k] + symb2: AlignedScoredAminoacid = sequence2.symbols[k] + state = hmm.A if symb2 == Gap() else hmm.B if symb1 == Gap() else hmm.M + i += d[state][0] + j += d[state][1] + probs.append(marginal_prob[i, j, hmm.state_index(state)]) + return probs diff --git a/src/markov_probability_model/pairwise_alignment/algo/viterbi.py b/src/markov_probability_model/pairwise_alignment/algo/viterbi.py new file mode 100644 index 0000000..cc4d096 --- /dev/null +++ b/src/markov_probability_model/pairwise_alignment/algo/viterbi.py @@ -0,0 +1,127 @@ +import numpy as np +import copy + +from src.markov_probability_model.pairwise_alignment.sequence_aligner import ScoredPairwiseAlignmentOutput, \ + PairwiseAlignmentOutputWithLogs +from src.markov_probability_model.base.sequence import AminoacidSequence, ScoredAminoacidSequence, \ + AlignedAminoacidSequence, AlignedScoredAminoacidSequence +from src.markov_probability_model.base.alphabet import Gap, Symbol, AlignedAminoacid, AlignedScoredAminoacid +from src.markov_probability_model.hmm.pairwise_alignment_hmm import PairwiseAlignmentHmm, \ + PairwiseAlignmentHmmObservation, PairwiseAlignmentHmmState +from src.markov_probability_model.pairwise_alignment.sequence_aligner import PairwiseSequenceAligner +from src.markov_probability_model.pairwise_alignment.score_augmentations import ScoreAugmentator +from src.markov_probability_model.pairwise_alignment.algo.utils import log_marginal_prob_for_alignment, \ + calculate_log_alpha +from typing import List +from src.markov_probability_model.base.utils import my_log, log_add_exp + + +class ViterbiOutput(PairwiseAlignmentOutputWithLogs, ScoredPairwiseAlignmentOutput): + def __init__(self, aligned_sequence1: AlignedAminoacidSequence, aligned_sequence2: AlignedScoredAminoacidSequence, + viterbi_score: float, logs: str): + super(ViterbiOutput, self).__init__(aligned_sequence1, aligned_sequence2, logs) + self.viterbi_score = viterbi_score + + def score(self): + return self.viterbi_score + + +class Viterbi(PairwiseSequenceAligner[ViterbiOutput]): + def __init__(self, hmm: PairwiseAlignmentHmm, sa: ScoreAugmentator): + self._hmm = hmm + self._sa = sa + + def align(self, seq1: AminoacidSequence, seq2: ScoredAminoacidSequence) -> ViterbiOutput: + hmm = self._hmm + n, m, k = len(seq1.symbols), len(seq2.symbols), len(hmm.states) + + f = np.full((n + 1, m + 1, k), None) + prev_state = np.full((n + 1, m + 1, k), None) + + d = {hmm.M: (1, 1), hmm.A: (1, 0), hmm.B: (0, 1)} + + for st in hmm.states: + i, j = d[st] + s1 = seq1.symbols[0] if i == 1 else Gap() + s2 = seq2.symbols[0] if j == 1 else Gap() + f[i, j, hmm.state_index(st)] = \ + my_log(hmm.start_prob(st)) + my_log(hmm.observation_prob(st, PairwiseAlignmentHmmObservation(s1, s2))) + + for i in range(n + 1): + for j in range(m + 1): + for st in hmm.states: + di, dj = d[st] + if i < di or j < dj: + continue + for frm in hmm.states: + s1 = seq1.symbols[i - 1] if di == 1 else Gap() + s2 = seq2.symbols[j - 1] if dj == 1 else Gap() + sc = my_log(hmm.transition_prob(frm, st)) + my_log( + hmm.observation_prob(st, PairwiseAlignmentHmmObservation(s1, s2))) + frm_idx = hmm.state_index(frm) + st_idx: int = hmm.state_index(st) + if f[i - di, j - dj, frm_idx] is None: + continue + if f[i, j, st_idx] is None or \ + f[i, j, st_idx] < f[i - di, j - dj, frm_idx] + sc: + f[i, j, st_idx] = f[i - di, j - dj, frm_idx] + sc + prev_state[i, j, st_idx] = copy.deepcopy(frm) + + i, j, best_st, best_score = n, m, hmm.M, -np.inf + for st in hmm.states: + if f[i, j, hmm.state_index(st)] > best_score: + best_st, best_score = st, f[i, j, hmm.state_index(st)] + states = [best_st] + while prev_state[i, j, hmm.state_index(best_st)] is not None: + prev_st = prev_state[i, j, hmm.state_index(best_st)] + states.append(prev_st) + i -= d[best_st][0] + j -= d[best_st][1] + best_st = prev_st + + states.reverse() + assert i <= 1 and j <= 1, "Internal error" + + def _log_observation_score(state: PairwiseAlignmentHmmState, symb1: Symbol, symb2: Symbol): + name = 'P' if state == hmm.M else 'Q_a' if state == hmm.A else 'Q_b' + return 'log({}({}, {})) = log({}) = {}'.format( + name, symb1, symb2, hmm.observation_prob(state, PairwiseAlignmentHmmObservation(symb1, symb2)), + my_log(hmm.observation_prob(state, PairwiseAlignmentHmmObservation(symb1, symb2)))) + + def _log_start_score(state: PairwiseAlignmentHmmState): + return 'log(Mu({})) = log({}) = {}'.format(state, hmm.start_prob(state), + my_log(hmm.start_prob(state))) + + def _log_transition_score(prev: PairwiseAlignmentHmmState, nxt: PairwiseAlignmentHmmState): + return 'log(Tau({} -> {})) = log({}) = {}'.format(prev, nxt, hmm.transition_prob(prev, nxt), + my_log(hmm.transition_prob(prev, nxt))) + + i, j = 0, 0 + aligned1: List[AlignedAminoacid] = [] + aligned2: List[AlignedScoredAminoacid] = [] + logs = [] + for prev_state, st in zip([None] + states[:-1], states): + s1 = Gap() if d[st][0] == 0 else seq1.symbols[i] + s2 = Gap() if d[st][1] == 0 else seq2.symbols[j] + if prev_state is None: + logs.append(_log_start_score(st)) + else: + logs.append(_log_transition_score(prev_state, st)) + logs.append(_log_observation_score(st, s1, s2)) + aligned1.append(s1) + aligned2.append(s2) + i += d[st][0] + j += d[st][1] + + aligned_seq1: AlignedAminoacidSequence = AlignedAminoacidSequence(seq1.sequence_id, aligned1) + aligned_seq2: AlignedScoredAminoacidSequence = AlignedScoredAminoacidSequence(seq2.sequence_id, aligned2) + + # alpha = calculate_log_alpha(seq1, seq2, hmm) + # score = log_add_exp([alpha[n, m, x] for x in range(k)]) + # best_score -= score # - P(Q) + + # logs.append('Marginal probs:') + # logs.append(' '.join(map(str, log_marginal_prob_for_alignment(aligned_seq1, aligned_seq2, hmm)))) + return ViterbiOutput(aligned_seq1, aligned_seq2, + self._sa.recalculate_score(best_score, seq1, seq2, hmm.parameters), + logs='\n'.join(logs)) diff --git a/src/markov_probability_model/pairwise_alignment/alignment_generator.py b/src/markov_probability_model/pairwise_alignment/alignment_generator.py new file mode 100644 index 0000000..112c626 --- /dev/null +++ b/src/markov_probability_model/pairwise_alignment/alignment_generator.py @@ -0,0 +1,43 @@ +import abc + +from typing import List, TypeVar, Generic, Optional +from src.markov_probability_model.pairwise_alignment.sequence_aligner import PairwiseAlignmentOutputWithLogs, \ + PairwiseSequenceAligner +from src.markov_probability_model.data_loader.data_loader import TwoSequenceListsData +from src.markov_probability_model.base.sequence import AminoacidSequence +from src.markov_probability_model.pairwise_alignment.logger import AlignmentsLogger +from multiprocessing import Pool +from tqdm import tqdm + + +class AlignmentGenerator(abc.ABC): + @abc.abstractmethod + def generate_alignments(self) -> List[PairwiseAlignmentOutputWithLogs]: + pass + + +O = TypeVar('O') + + +class AllPairsAlignmentGenerator(AlignmentGenerator, Generic[O]): + def __init__(self, data: TwoSequenceListsData, sequence_aligner: PairwiseSequenceAligner[O], + logger: Optional[AlignmentsLogger[O]] = None, pool_sz: int = 1): + self._data = data + self._sequence_aligner = sequence_aligner + self._logger = logger + self._pool_sz = pool_sz + + def generate_alignments(self) -> List[O]: + with Pool(self._pool_sz) as p: + alignment_lists: List[List[O]] = list( + tqdm(p.imap(self._generate_alignments_for_seq1, self._data.sequences1), + total=len(self._data.sequences1))) + return [alignment for alignments in alignment_lists for alignment in alignments] + + def _generate_alignments_for_seq1(self, seq1: AminoacidSequence) -> List[O]: + alignments: List[O] = [] + for seq2 in self._data.sequences2: + alignments.append(self._sequence_aligner.align(seq1, seq2)) + if self._logger is not None: + self._logger.log_alignment(alignments[-1]) + return alignments diff --git a/src/markov_probability_model/pairwise_alignment/fdr.py b/src/markov_probability_model/pairwise_alignment/fdr.py new file mode 100644 index 0000000..cedda5f --- /dev/null +++ b/src/markov_probability_model/pairwise_alignment/fdr.py @@ -0,0 +1,95 @@ +import pandas as pd +import matplotlib.pyplot as plt +import os + +from src.markov_probability_model.pairwise_alignment.sequence_aligner import ScoredPairwiseAlignmentOutput +from typing import List, NewType, Optional, Dict +from collections import defaultdict + + +class FdrParameters: + def __init__(self, topk: int, relative_to: str, + pairs_df_logpath: Optional[str], best_pairs_df_logpath: Optional[str]): + self.topk = topk + self.relative_to = relative_to + self.pairs_df_logpath = pairs_df_logpath + self.best_pairs_df_logpath = best_pairs_df_logpath + + def __str__(self): + return f'{self.relative_to}_top{self.topk}' + + +FdrData = NewType('FdrData', List[float]) + + +class FdrGenerator: + def __init__(self, alignments: List[ScoredPairwiseAlignmentOutput], fdr_parameters: List[FdrParameters]): + self._fdr_parameters: List[FdrParameters] = fdr_parameters + self._alignments: List[ScoredPairwiseAlignmentOutput] = sorted(alignments, key=lambda x: -x.score()) + + def generate_fdr(self) -> List[FdrData]: + return [self._generate_single_fdr(p) for p in self._fdr_parameters] + + def _generate_single_fdr(self, p: FdrParameters) -> FdrData: + groups: Dict[List[ScoredPairwiseAlignmentOutput]] = defaultdict(list) + for alignment in self._alignments: + base_struct_id = alignment.aligned_sequence1.base_sequence_id if p.relative_to == 'mol' else \ + alignment.aligned_sequence2.base_sequence_id + groups[base_struct_id].append(alignment) + groups_list: List[List[ScoredPairwiseAlignmentOutput]] = list(groups.values()) + groups_list.sort(key=lambda x: -x[0].score()) + + if p.pairs_df_logpath is not None: + pd.DataFrame({ + 'Seq1': [a.aligned_sequence1.sequence_id for g in groups_list for a in g], + 'Seq2': [a.aligned_sequence2.sequence_id for g in groups_list for a in g], + 'Score': [a.score() for g in groups_list for a in g], + 'Aligned1': [' '.join(map(str, a.aligned_sequence1.symbols)) for g in groups_list for a in g], + 'Aligned2': [' '.join(map(str, a.aligned_sequence2.symbols)) for g in groups_list for a in g], + }).to_csv(p.pairs_df_logpath) + + correct: int = 0 + incorrect: int = 0 + fdr: List[float] = [] + result_alignments: List[ScoredPairwiseAlignmentOutput] = [] + for alignments in groups_list: + cut_alignments: List[ScoredPairwiseAlignmentOutput] = alignments[:min(len(alignments), p.topk)] + while len(alignments) > len(cut_alignments) and \ + cut_alignments[-1].score() == alignments[len(cut_alignments)].score(): + cut_alignments.append(alignments[len(cut_alignments)]) + res: ScoredPairwiseAlignmentOutput = cut_alignments[0] + for a in cut_alignments: + if a.aligned_sequence1.base_sequence_id == a.aligned_sequence2.base_sequence_id: + res = a + break + result_alignments.append(res) + if res.aligned_sequence1.base_sequence_id == res.aligned_sequence2.base_sequence_id: + correct += 1 + else: + incorrect += 1 + fdr.append(incorrect / (correct + incorrect)) + + if p.best_pairs_df_logpath is not None: + pd.DataFrame({ + 'Seq1': [a.aligned_sequence1.sequence_id for a in result_alignments], + 'Seq2': [a.aligned_sequence2.sequence_id for a in result_alignments], + 'Score': [a.score() for a in result_alignments], + 'Aligned1': [' '.join(map(str, a.aligned_sequence1.symbols)) for a in result_alignments], + 'Aligned2': [' '.join(map(str, a.aligned_sequence2.symbols)) for a in result_alignments], + }).to_csv(p.best_pairs_df_logpath) + + return FdrData(fdr) + + +def plot_fdrs(fdr_parameters: List[FdrParameters], fdrs: List[Dict[str, FdrData]], save_dir: str): + if not os.path.exists(save_dir): + os.makedirs(save_dir) + for i, p in enumerate(fdr_parameters): + plt.title(f'FDR_{str(p)}') + legends: List[str] = [] + for name, others_fdr in fdrs[i].items(): + plt.plot(range(len(others_fdr)), others_fdr) + legends.append(name) + plt.legend(legends) + plt.savefig(os.path.join(save_dir, f'FDR_{str(p)}.png')) + plt.clf() diff --git a/src/markov_probability_model/pairwise_alignment/logger.py b/src/markov_probability_model/pairwise_alignment/logger.py new file mode 100644 index 0000000..27b644c --- /dev/null +++ b/src/markov_probability_model/pairwise_alignment/logger.py @@ -0,0 +1,59 @@ +import abc +import os + +from src.markov_probability_model.pairwise_alignment.sequence_aligner import PairwiseAlignmentOutputWithLogs +from src.markov_probability_model.base.sequence import SequenceId +from src.markov_probability_model.base.alphabet import AlignedScoredAminoacid, Gap +from typing import TypeVar, Generic + +O = TypeVar('O') + + +class AlignmentsLogger(Generic[O]): + @abc.abstractmethod + def log_alignment(self, alignment: O): + pass + + +class HtmlAlignmentsLogger(AlignmentsLogger[PairwiseAlignmentOutputWithLogs]): + def __init__(self, log_dir: str): + self._log_dir = log_dir + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + @staticmethod + def _get_colour_by_score(score: float): + if score > 90.0: + return 'green' + elif score > 60.0: + return 'yellow' + else: + return 'red' + + def _get_string_for_output(self, s: AlignedScoredAminoacid) -> str: + if s == Gap(): + return '' + str(s) + '' + return '' + str(s).split('(')[0] + \ + '%s' % ( + self._get_colour_by_score(s.score), s.score) + + def log_alignment(self, alignment: PairwiseAlignmentOutputWithLogs): + id1: SequenceId = alignment.aligned_sequence1.sequence_id + id2: SequenceId = alignment.aligned_sequence2.sequence_id + logpath = os.path.join(self._log_dir, f'{id1}_{id2}.html') + + with open(logpath, 'w') as f: + f.write('') + f.write(f'

{id1} & {id2}

') + f.write('') + f.write('') + for ind in range(len(alignment)): + f.write(f'') + f.write('') + for ind in range(len(alignment)): + f.write(self._get_string_for_output(alignment.aligned_sequence2.symbols[ind])) + f.write('') + f.write('
{str(alignment.aligned_sequence1.symbols[ind])}
') + f.write('
') + f.write('

Logs:

' + ''.join('

 ' + p + '

' for p in alignment.logs.split('\n'))) + f.write('') diff --git a/src/markov_probability_model/pairwise_alignment/score_augmentations.py b/src/markov_probability_model/pairwise_alignment/score_augmentations.py new file mode 100644 index 0000000..62f7f6f --- /dev/null +++ b/src/markov_probability_model/pairwise_alignment/score_augmentations.py @@ -0,0 +1,25 @@ +import abc + +from src.markov_probability_model.base.utils import my_log +from src.markov_probability_model.hmm.pairwise_alignment_hmm import PairwiseAlignmentHMMParameters +from src.markov_probability_model.base.sequence import AminoacidSequence, ScoredAminoacidSequence + + +class ScoreAugmentator(abc.ABC): + @abc.abstractmethod + def recalculate_score(self, score: float, seq1: AminoacidSequence, seq2: ScoredAminoacidSequence, + params: PairwiseAlignmentHMMParameters) -> float: + pass + + +class IdentityScoreAugmentator(ScoreAugmentator): + def recalculate_score(self, score: float, seq1: AminoacidSequence, seq2: ScoredAminoacidSequence, + params: PairwiseAlignmentHMMParameters) -> float: + return score + + +class NullHypothesisScoreAugmentator(ScoreAugmentator): + def recalculate_score(self, score: float, seq1: AminoacidSequence, seq2: ScoredAminoacidSequence, + params: PairwiseAlignmentHMMParameters) -> float: + score -= sum(my_log(params.q_a[a]) for a in seq1.symbols) + sum(my_log(params.q_b[b]) for b in seq2.symbols) + return score diff --git a/src/markov_probability_model/pairwise_alignment/sequence_aligner.py b/src/markov_probability_model/pairwise_alignment/sequence_aligner.py new file mode 100644 index 0000000..94b7fae --- /dev/null +++ b/src/markov_probability_model/pairwise_alignment/sequence_aligner.py @@ -0,0 +1,38 @@ +import abc + +from src.markov_probability_model.base.sequence import AlignedAminoacidSequence, AlignedScoredAminoacidSequence, \ + AminoacidSequence, ScoredAminoacidSequence +from typing import TypeVar, Generic + + +class PairwiseAlignmentOutput: + def __init__(self, aligned_sequence1: AlignedAminoacidSequence, + aligned_sequence2: AlignedScoredAminoacidSequence): + self.aligned_sequence1 = aligned_sequence1 + self.aligned_sequence2 = aligned_sequence2 + assert len(self.aligned_sequence1) == len(self.aligned_sequence2) + + def __len__(self): + return len(self.aligned_sequence1) + + +class PairwiseAlignmentOutputWithLogs(PairwiseAlignmentOutput): + def __init__(self, aligned_sequence1: AlignedAminoacidSequence, + aligned_sequence2: AlignedScoredAminoacidSequence, logs: str): + super().__init__(aligned_sequence1, aligned_sequence2) + self.logs = logs + + +class ScoredPairwiseAlignmentOutput(abc.ABC, PairwiseAlignmentOutput): + @abc.abstractmethod + def score(self): + pass + + +O = TypeVar('O') + + +class PairwiseSequenceAligner(abc.ABC, Generic[O]): + @abc.abstractmethod + def align(self, seq1: AminoacidSequence, seq2: ScoredAminoacidSequence) -> O: + pass diff --git a/src/markov_probability_model/parameters/baum_welch_parameters_estimator.py b/src/markov_probability_model/parameters/baum_welch_parameters_estimator.py new file mode 100644 index 0000000..0ba42b6 --- /dev/null +++ b/src/markov_probability_model/parameters/baum_welch_parameters_estimator.py @@ -0,0 +1,251 @@ +import numpy as np +import os + +from src.markov_probability_model.parameters.parameters_calculator import ParametersCalculator +from src.markov_probability_model.data_loader.alignments_loader import PairwiseAlignmentOutputWithLogs +from src.markov_probability_model.data_loader.data_loader import TwoSequenceListsData +from src.markov_probability_model.base.alphabet import Gap, AminoacidAlphabet, ScoredAminoacidAlphabet +from src.markov_probability_model.base.sequence import AminoacidSequence, ScoredAminoacidSequence +from src.markov_probability_model.base.utils import my_log, my_exp, log_add_exp +from src.markov_probability_model.parameters.utils import estimate_p_with_modifications, parse_nerpa_config, \ + estimate_qa_qb, array_to_dict, same_modifications, generate_p_score, generate_p_mods +from src.markov_probability_model.pairwise_alignment.algo.utils import calculate_log_alpha, calculate_log_beta +from src.markov_probability_model.parameters.utils import get_alphabets_from_data, get_names_alphabet +from src.markov_probability_model.hmm.pairwise_alignment_hmm import PairwiseAlignmentHmm, \ + PairwiseAlignmentHmmObservation, PairwiseAlignmentHMMParameters +from typing import List, Optional, Dict, Tuple +from tqdm import tqdm +from multiprocessing import Pool + +PSEUDOCOUNT = 1e-7 + + +def _calculate_a(seqs1: List[AminoacidSequence], seqs2: List[ScoredAminoacidSequence], + gammas: List[np.ndarray], epss: List[np.ndarray], hmm: PairwiseAlignmentHmm): + a = np.full((len(hmm.states), len(hmm.states)), 0.0) + for i in range(len(hmm.states)): + for j in range(len(hmm.states)): + a[i, j] = my_exp(log_add_exp([eps[t, s, i, j] + gamma[t, s, i] + for seq1, seq2, eps, gamma in zip(seqs1, seqs2, epss, gammas) + for t in range(len(seq1) + 1) for s in range(len(seq2) + 1)])) + a[i, :] /= a[i, :].sum() + return a + + +def _calculate_gamma(seq1: AminoacidSequence, seq2: ScoredAminoacidSequence, hmm: PairwiseAlignmentHmm, + alpha: np.ndarray, beta: np.ndarray) -> np.ndarray: + n, m = len(seq1), len(seq2) + gamma = np.full((n + 1, m + 1, len(hmm.states)), -np.inf) + sum_ = log_add_exp([alpha[n, m, hmm.state_index(i)] for i in hmm.states]) + for t in range(n + 1): + for s in range(m + 1): + for i in hmm.states: + gamma[t, s, hmm.state_index(i)] = \ + alpha[t, s, hmm.state_index(i)] + beta[t, s, hmm.state_index(i)] - sum_ + return gamma + + +def _calculate_eps(seq1: AminoacidSequence, seq2: ScoredAminoacidSequence, hmm: PairwiseAlignmentHmm, + alpha: np.ndarray, beta: np.ndarray) -> np.ndarray: + d = {hmm.M: (1, 1), hmm.A: (1, 0), hmm.B: (0, 1)} + n, m = len(seq1), len(seq2) + eps = np.full((n + 1, m + 1, len(hmm.states), len(hmm.states)), -np.inf) + for t in range(n + 1): + for s in range(m + 1): + for x in hmm.states: + for y in hmm.states: + di, dj = d[y] + if t + di > n or s + dj > m: + continue + if beta[t, s, hmm.state_index(x)] == -np.inf: + continue + if alpha[t, s, hmm.state_index(x)] == -np.inf: + continue + symb1 = Gap() if di == 0 else seq1.symbols[t + di - 1] + symb2 = Gap() if dj == 0 else seq2.symbols[s + dj - 1] + eps[t, s, hmm.state_index(x), hmm.state_index(y)] = \ + my_log(hmm.observation_prob(y, PairwiseAlignmentHmmObservation(symb1, symb2))) + my_log( + hmm.transition_prob(x, y)) + beta[t + di, s + dj, hmm.state_index(y)] - beta[ + t, s, hmm.state_index(x)] + return eps + + +def _calculate_pi(gammas: List[np.ndarray], hmm: PairwiseAlignmentHmm): + d = {hmm.M: (1, 1), hmm.A: (1, 0), hmm.B: (0, 1)} + pi = np.full((len(hmm.states),), 0.0) + for v in hmm.states: + di, dj = d[v] + pi[hmm.state_index(v)] = my_exp(log_add_exp([ + gamma[di, dj, hmm.state_index(v)] for gamma in gammas])) + for v in hmm.states: + pi[hmm.state_index(v)] /= len(gammas) + return pi + + +def _estimate_f(alphabet: List[str], + seqs1: List[AminoacidSequence], seqs2: List[ScoredAminoacidSequence], + gammas: List[np.ndarray], hmm: PairwiseAlignmentHmm, + modification1, methylation1, modification2, methylation2) -> Dict[str, float]: + res: Dict[str, float] = {a: my_log(PSEUDOCOUNT) for a in alphabet} + div_term = my_log(len(alphabet) * PSEUDOCOUNT) + for seq1, seq2, gam in zip(seqs1, seqs2, gammas): + for t in range(1, len(seq1) + 1): + for s in range(1, len(seq2) + 1): + cur_gam = gam[t, s, hmm.state_index(hmm.M)] + if seq1.symbols[t - 1].name != seq2.symbols[s - 1]: + continue + if same_modifications(seq1.symbols[t - 1], modification1, methylation1) and \ + same_modifications(seq2.symbols[s - 1], modification2, methylation2): + res[seq1.symbols[t - 1].name] = log_add_exp([res[seq1.symbols[t - 1].name], cur_gam]) + div_term = log_add_exp([div_term, cur_gam]) + for a in alphabet: + res[a] = my_exp(res[a] - div_term) + return res + + +def _estimate_g(alphabet: List[str], + seqs1: List[AminoacidSequence], seqs2: List[ScoredAminoacidSequence], + gammas: List[np.ndarray], hmm: PairwiseAlignmentHmm, + modification, methylation) -> Dict[str, float]: + res: Dict[str, float] = {a: my_log(PSEUDOCOUNT) for a in alphabet} + div_term = my_log(len(alphabet) * PSEUDOCOUNT) + for seq1, seq2, gam in zip(seqs1, seqs2, gammas): + for t in range(1, len(seq1) + 1): + for s in range(1, len(seq2) + 1): + cur_gam = gam[t, s, hmm.state_index(hmm.M)] + if seq1.symbols[t - 1].name == seq2.symbols[s - 1].name: + continue + if same_modifications(seq1.symbols[t - 1], modification, methylation): + res[seq1.symbols[t - 1].name] = log_add_exp([res[seq1.symbols[t - 1].name], cur_gam]) + if same_modifications(seq2.symbols[s - 1], modification, methylation): + res[seq2.symbols[s - 1].name] = log_add_exp([res[seq2.symbols[s - 1].name], cur_gam]) + div_term = log_add_exp([div_term, cur_gam]) + for a in alphabet: + res[a] = my_exp(res[a] - div_term) + return res + + +class BaumWelchParametersEstimator(ParametersCalculator): + def __init__(self, alignments: List[PairwiseAlignmentOutputWithLogs], + data: TwoSequenceListsData, + nerpa_cfg_path: str, + init_params: PairwiseAlignmentHMMParameters, + n_iterations: int, + recalculate_transition_probs: bool = True, + log_dir: Optional[str] = None, + pool_sz: int = 1): + self._alignments = alignments + self._data = data + self._nerpa_cfg = parse_nerpa_config(nerpa_cfg_path) + self._cur_params = init_params + self._hmm = PairwiseAlignmentHmm(self._cur_params) + self._n_iterations = n_iterations + self._cur_iter = 0 + self._recalculate_transition_probs = recalculate_transition_probs + self._log_dir = log_dir + self._omega_a, self._omega_b = get_alphabets_from_data(self._data, self._alignments) + self._alphabet: List[str] = get_names_alphabet(self._omega_a, self._omega_b) + self._pool_sz = pool_sz + print('Will use Baum-Welch parameters estimator') + + def calculate_parameters(self) -> PairwiseAlignmentHMMParameters: + for _ in range(self._n_iterations): + print(f'Iteration {self._cur_iter}') + new_params = self._calculate_parameters_on_iteration() + d = _calculate_distance(new_params, self._cur_params) + print(f' distance = {d}') + self._cur_params = new_params + self._hmm = PairwiseAlignmentHmm(self._cur_params) + return self._cur_params + + def _calculate_alpha_beta_gamma_eps(self, alignment: PairwiseAlignmentOutputWithLogs) -> Tuple: + seq1 = AminoacidSequence(alignment.aligned_sequence1.sequence_id, + [i for i in alignment.aligned_sequence1.symbols if i != Gap()]) + seq2 = ScoredAminoacidSequence(alignment.aligned_sequence2.sequence_id, + [i for i in alignment.aligned_sequence2.symbols if i != Gap()]) + alpha = calculate_log_alpha(seq1, seq2, self._hmm) + beta = calculate_log_beta(seq1, seq2, self._hmm) + gamma = _calculate_gamma(seq1, seq2, self._hmm, alpha, beta) + eps = _calculate_eps(seq1, seq2, self._hmm, alpha, beta) + return seq1, seq2, alpha, beta, gamma, eps + + def _calculate_f(self, case: Tuple[str, bool, str, bool, List, List, List[np.ndarray]]): + modification1, methylation1, modification2, methylation2, seqs1, seqs2, gammas = case + return _estimate_f(self._alphabet, seqs1, seqs2, gammas, self._hmm, + modification1, methylation1, modification2, methylation2) + + def _calculate_g(self, case: Tuple[str, bool, List, List, List[np.ndarray]]): + modification, methylation, seqs1, seqs2, gammas = case + return _estimate_g(self._alphabet, seqs1, seqs2, gammas, self._hmm, modification, methylation) + + def _calculate_parameters_on_iteration(self) -> PairwiseAlignmentHMMParameters: + self._cur_iter += 1 + + alignments: List[PairwiseAlignmentOutputWithLogs] = [a for a in self._alignments if + _check_observation(a, self._omega_a, self._omega_b)] + with Pool(self._pool_sz) as p: + prob_data: List[Tuple] = list( + tqdm(p.imap(self._calculate_alpha_beta_gamma_eps, alignments), + total=len(alignments), desc='Calculating marginal probs')) + + seqs1, seqs2, alphas, betas, gammas, epss = map(list, zip(*prob_data)) + if self._recalculate_transition_probs: + mu, tau = np.array(_calculate_pi(gammas, self._hmm)), np.array( + _calculate_a(seqs1, seqs2, gammas, epss, self._hmm)) + else: + mu, tau = self._cur_params.mu, self._cur_params.tau + + # 1. Estimate g(a, mod1, meth1, mod2, meth2) = P(a, mod1, meth1, mod2, meth2 | mismatch) + # 2. Estimate f(a, mod1, meth1, mod2, meth2) = P(a, mod1, meth1, mod2, meth2 | match) + f = {} + f_cases = [(modification1, methylation1, modification2, methylation2, seqs1, seqs2, gammas) + for modification1 in ['@D', '@L'] + for methylation1 in [False, True] + for modification2 in ['@D', '@L'] + for methylation2 in [False, True]] + with Pool(self._pool_sz) as p: + f_data: List[Tuple] = list( + tqdm(p.imap(self._calculate_f, f_cases), + total=len(f_cases), desc='Estimating f')) + for i, (modification1, methylation1, modification2, methylation2, _, _, _) in enumerate(f_cases): + f[(modification1, methylation1, modification2, methylation2)] = f_data[i] + + g = {} + g_cases = [(modification, methylation, seqs1, seqs2, gammas) + for modification in ['@D', '@L'] + for methylation in [False, True]] + with Pool(self._pool_sz) as p: + g_data: List[Tuple] = list( + tqdm(p.imap(self._calculate_g, g_cases), + total=len(g_cases), desc='Estimating g')) + for i, (modification, methylation, _, _, _) in enumerate(g_cases): + g[(modification, methylation)] = g_data[i] + + p_score = generate_p_score(self._nerpa_cfg, self._data) + p_mods = generate_p_mods(self._data) + p = estimate_p_with_modifications(self._omega_a, self._omega_b, self._nerpa_cfg, f, g, p_score, p_mods) + q_a, q_b = estimate_qa_qb(p) + p, q_a, q_b = array_to_dict(self._omega_a, self._omega_b, p, q_a, q_b) + res = PairwiseAlignmentHMMParameters(self._omega_a, self._omega_b, mu=mu, tau=tau, p=p, q_a=q_a, q_b=q_b) + if self._log_dir is not None: + res.log_to(os.path.join(self._log_dir, f'baum_welch_parameters_iter{self._cur_iter}.txt')) + return res + + +def _calculate_distance(p1: PairwiseAlignmentHMMParameters, p2: PairwiseAlignmentHMMParameters) -> float: + p = np.array([[p1.p[a][b] - p2.p[a][b] for b in p1.omega_b] for a in p1.omega_a]) + q_a = np.array([p1.q_a[a] - p2.q_a[a] for a in p1.omega_a]) + q_b = np.array([p1.q_b[b] - p2.q_b[b] for b in p1.omega_b]) + return np.linalg.norm(p1.mu - p2.mu) + np.linalg.norm(p1.tau - p2.tau) + np.linalg.norm(p) + np.linalg.norm( + q_a) + np.linalg.norm(q_b) + + +def _check_observation(alignment: PairwiseAlignmentOutputWithLogs, omega_a: AminoacidAlphabet, + omega_b: ScoredAminoacidAlphabet) -> bool: + for s in alignment.aligned_sequence1.symbols: + if s != Gap() and s.name not in [a.name for a in omega_a]: + return False + for s in alignment.aligned_sequence2.symbols: + if s != Gap() and s.name not in [b.name for b in omega_b]: + return False + return True diff --git a/src/markov_probability_model/parameters/ml_parameters_estimator.py b/src/markov_probability_model/parameters/ml_parameters_estimator.py new file mode 100644 index 0000000..318739d --- /dev/null +++ b/src/markov_probability_model/parameters/ml_parameters_estimator.py @@ -0,0 +1,145 @@ +import numpy as np +import os + +from src.markov_probability_model.base.alphabet import Aminoacid, ScoredAminoacid, AlignedAminoacid, \ + AlignedScoredAminoacid, Gap +from src.markov_probability_model.parameters.utils import same_modifications, estimate_p_with_modifications +from src.markov_probability_model.pairwise_alignment.sequence_aligner import PairwiseAlignmentOutputWithLogs +from src.markov_probability_model.data_loader.data_loader import TwoSequenceListsData +from src.markov_probability_model.hmm.pairwise_alignment_hmm import PairwiseAlignmentHMMParameters +from src.markov_probability_model.parameters.utils import parse_nerpa_config, get_alphabets_from_data, estimate_qa_qb, \ + array_to_dict, get_names_alphabet, generate_p_score, generate_p_mods +from typing import List, Optional, Tuple, Dict +from collections import defaultdict + + +class MaxLikelihoodParametersEstimator: + def __init__(self, alignments: List[PairwiseAlignmentOutputWithLogs], + data: TwoSequenceListsData, nerpa_cfg_path: str, + estimate_transition_probs: bool = False, + log_dir: Optional[str] = None): + self._alignments = alignments + self._data = data + self._nerpa_cfg = parse_nerpa_config(nerpa_cfg_path) + self._estimate_transition_probs = estimate_transition_probs + self._log_dir = log_dir + + def calculate_parameters(self) -> PairwiseAlignmentHMMParameters: + if self._estimate_transition_probs: + tau, mu = estimate_tau_mu(self._alignments) + else: + tau, mu = get_tau_mu_from_nerpa_cfg(self._nerpa_cfg) + omega_a, omega_b = get_alphabets_from_data(self._data, self._alignments) + p = self._estimate_p(omega_a, omega_b) + q_a, q_b = estimate_qa_qb(p) + p, q_a, q_b = array_to_dict(omega_a, omega_b, p, q_a, q_b) + res = PairwiseAlignmentHMMParameters(omega_a, omega_b, mu, tau, p, q_a, q_b) + if self._log_dir is not None: + res.log_to(os.path.join(self._log_dir, 'max_likelihood_parameters.txt')) + return res + + def _estimate_p(self, omega_a: List[Aminoacid], omega_b: List[ScoredAminoacid]) -> np.ndarray: + observations = [(alignment.aligned_sequence1.symbols[i], + alignment.aligned_sequence2.symbols[i]) + for alignment in self._alignments + for i in range(len(alignment.aligned_sequence1))] + # 1. Estimate g(a, mod, meth) = P(a, mod, meth | match) + # 2. Estimate f(a, mod1, meth1, mod2, meth2) = P(a, mod1, meth1, mod2, meth2 | mismatch) + f, g = {}, {} + for modification1 in ['@L', '@D']: + for methylation1 in [True, False]: + g[(modification1, methylation1)] = \ + estimate_g(omega_a, omega_b, observations, modification1, methylation1) + for modification2 in ['@L', '@D']: + for methylation2 in [True, False]: + f[(modification1, methylation1, modification2, methylation2)] = \ + estimate_f(omega_a, omega_b, observations, + modification1, methylation1, modification2, methylation2) + # 3. Assign p + p_score = generate_p_score(self._nerpa_cfg, self._data) + p_mods = generate_p_mods(self._data) + p = estimate_p_with_modifications(omega_a, omega_b, self._nerpa_cfg, f, g, p_score, p_mods) + return p + + +def estimate_f(omega_a: List[Aminoacid], omega_b: List[ScoredAminoacid], + observations: List[Tuple[AlignedAminoacid, AlignedScoredAminoacid]], + modification1, methylation1, modification2, methylation2) -> Dict[str, float]: + alphabet: List[str] = get_names_alphabet(omega_a, omega_b) + met = {a: 1 for a in alphabet} + div_term = len(alphabet) + for s1, s2 in observations: + if s1 == Gap() or s2 == Gap(): + continue + if s1.name not in alphabet or s2.name not in alphabet: + continue + if s1.name != s2.name: + continue + if same_modifications(s1, modification1, methylation1) and same_modifications(s2, modification2, methylation2): + met[s1.name] += 1 + div_term += 1 + f: Dict[str, float] = defaultdict(float) + for a, met_a in met.items(): + f[a] = met_a / div_term + return f + + +def estimate_g(omega_a: List[Aminoacid], omega_b: List[ScoredAminoacid], + observations: List[Tuple[AlignedAminoacid, AlignedScoredAminoacid]], + modification, methylation) -> Dict[str, float]: + alphabet: List[str] = get_names_alphabet(omega_a, omega_b) + met = {a: 1 for a in alphabet} + div_term = len(alphabet) + for s1, s2 in observations: + if s1 == Gap() or s2 == Gap(): + continue + if s1.name not in alphabet or s2.name not in alphabet: + continue + if s1.name == s2.name: + continue + if same_modifications(s1, modification, methylation): + met[s1.name] += 1 + if same_modifications(s2, modification, methylation): + met[s2.name] += 1 + div_term += 2 + g: Dict[str, float] = defaultdict(float) + for a, met_a in met.items(): + g[a] = met_a / div_term + return g + + +def _get_state_from_observation(symb1: AlignedAminoacid, symb2: AlignedScoredAminoacid): + if symb1 != Gap() and symb2 != Gap(): + return 0 + elif symb2 == Gap(): + return 1 + else: + return 2 + + +def estimate_tau_mu(alignments: List[PairwiseAlignmentOutputWithLogs]) -> Tuple[np.ndarray, np.ndarray]: + tau = np.ones((3, 3)) + mu = np.ones((3,)) + + for alignment in alignments: + states: List[int] = [] + for pos in range(len(alignment.aligned_sequence1)): + symb1: AlignedAminoacid = alignment.aligned_sequence1.symbols[pos] + symb2: AlignedScoredAminoacid = alignment.aligned_sequence2.symbols[pos] + states.append(_get_state_from_observation(symb1, symb2)) + mu[states[0]] += 1 + for i in range(1, len(states)): + tau[states[i - 1], states[i]] += 1 + + tau = tau / tau.sum(axis=1) + mu = mu / mu.sum() + return tau, mu + + +def get_tau_mu_from_nerpa_cfg(nerpa_cfg: Dict) -> Tuple[np.ndarray, np.ndarray]: + tau = np.ones((3, 3)) + tau[0, 1] = tau[1, 1] = tau[2, 1] = np.exp(nerpa_cfg['insertion']) + tau[0, 2] = tau[1, 2] = tau[2, 2] = np.exp(nerpa_cfg['deletion']) + tau = tau / tau.sum(axis=1) + mu = tau.mean(axis=0) + return tau, mu diff --git a/src/markov_probability_model/parameters/parameters_calculator.py b/src/markov_probability_model/parameters/parameters_calculator.py new file mode 100644 index 0000000..a079a8e --- /dev/null +++ b/src/markov_probability_model/parameters/parameters_calculator.py @@ -0,0 +1,9 @@ +import abc + +from src.markov_probability_model.hmm.pairwise_alignment_hmm import PairwiseAlignmentHMMParameters + + +class ParametersCalculator(abc.ABC): + @abc.abstractmethod + def calculate_parameters(self) -> PairwiseAlignmentHMMParameters: + pass diff --git a/src/markov_probability_model/parameters/utils.py b/src/markov_probability_model/parameters/utils.py new file mode 100644 index 0000000..9eca1da --- /dev/null +++ b/src/markov_probability_model/parameters/utils.py @@ -0,0 +1,145 @@ +import numpy as np + +from src.markov_probability_model.base.alphabet import Aminoacid, ScoredAminoacid, Gap, AminoacidAlphabet, \ + ScoredAminoacidAlphabet, Symbol +from src.markov_probability_model.data_loader.data_loader import TwoSequenceListsData +from src.markov_probability_model.pairwise_alignment.sequence_aligner import PairwiseAlignmentOutput +from typing import Tuple, List, Dict +from collections import defaultdict + + +def get_alphabets_from_data(data: TwoSequenceListsData, alignments: List[PairwiseAlignmentOutput]) -> Tuple[ + AminoacidAlphabet, ScoredAminoacidAlphabet]: + omega_a: List[Aminoacid] = [symb for sequence in data.sequences1 for symb in sequence.symbols if symb != Gap()] + \ + [s for a in alignments for s in a.aligned_sequence1.symbols if s != Gap()] + omega_b: List[ScoredAminoacid] = [symb for sequence in data.sequences2 + for symb in sequence.symbols if symb != Gap()] + \ + [s for a in alignments for s in a.aligned_sequence2.symbols if s != Gap()] + return AminoacidAlphabet(list(set(omega_a))), ScoredAminoacidAlphabet(list(set(omega_b))) + + +def get_alphabets_from_alignments(data: List[PairwiseAlignmentOutput]) \ + -> Tuple[AminoacidAlphabet, ScoredAminoacidAlphabet]: + omega_a: List[Aminoacid] = [symb for alignment in data for symb in alignment.aligned_sequence1.symbols + if symb != Gap()] + omega_b: List[ScoredAminoacid] = [symb for alignment in data + for symb in alignment.aligned_sequence2.symbols if symb != Gap()] + return AminoacidAlphabet(list(set(omega_a))), ScoredAminoacidAlphabet(list(set(omega_b))) + + +def parse_nerpa_config(cfg_path: str) -> Dict: + with open(cfg_path, 'r') as f: + lines = f.readlines() + cfg = {'insertion': float(lines[0].rstrip().split(' ')[1]), 'deletion': float(lines[1].rstrip().split(' ')[1]), + 'Scores': [float(l) for l in lines[2].rstrip().split(' ')[1:] if len(l) != 0], + 'ProbGenCorrect': [float(l) for l in lines[3].rstrip().split(' ')[1:] if len(l) != 0], + 'ProbGenIncorrect': [float(l) for l in lines[4].rstrip().split(' ')[1:] if len(l) != 0]} + return cfg + + +def _estimate_p_match(a: Aminoacid, b: ScoredAminoacid, nerpa_cfg: Dict, f: Dict, + p_score: Dict, mods_prob: Dict) -> float: + if a.modification is None: + a_mods_prob = mods_prob.copy() + else: + a_mods_prob = defaultdict(int) + a_mods_prob[a.modification] = 1.0 + if b.modification is None: + b_mods_prob = mods_prob.copy() + else: + b_mods_prob = defaultdict(int) + b_mods_prob[b.modification] = 1.0 + f_ = sum(f[(mod_a, a.methylation, mod_b, b.methylation)][a.name] * a_mods_prob[mod_a] * b_mods_prob[mod_b] + for mod_a in ['@L', '@D'] for mod_b in ['@L', '@D']) + return f_ * get_prob_gen(nerpa_cfg, b.score, p_score, correct=True) + + +def _estimate_p_miss(a: Aminoacid, b: ScoredAminoacid, nerpa_cfg: Dict, g: Dict, + p_score: Dict, mods_prob: Dict) -> float: + if a.modification is not None: + g_a = g[(a.modification, a.methylation)][a.name] + else: + g_a = sum(g[(mod, a.methylation)][a.name] * mods_prob[mod] for mod in ['@L', '@D']) + if b.modification is not None: + g_b = g[(b.modification, b.methylation)][b.name] + else: + g_b = sum(g[(mod, b.methylation)][b.name] * mods_prob[mod] for mod in ['@L', '@D']) + return g_a * g_b * get_prob_gen(nerpa_cfg, b.score, p_score, correct=False) + + +def estimate_p_with_modifications(omega_a: List[Aminoacid], omega_b: List[ScoredAminoacid], + nerpa_cfg: Dict, f: Dict, g: Dict, p_score: Dict, mods_prob: Dict): + # Estimate p(a, b). + # Model: + # p(@N-a, @M-a(score)) = f(a, N, M) * prob_gen_correct(score) * P(score) + # p(@N-a, @M-b(score)) = g(a, N) * g(b, M) * prob_gen_incorrect(score) * P(score) + p = np.zeros((len(omega_a), len(omega_b))) + for i, a in enumerate(omega_a): + for j, b in enumerate(omega_b): + if a.name == b.name: + p[i, j] = _estimate_p_match(a, b, nerpa_cfg, f, p_score, mods_prob) + else: + p[i, j] = _estimate_p_miss(a, b, nerpa_cfg, g, p_score, mods_prob) + p /= p.sum() + return p + + +def estimate_qa_qb(p: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + return p.sum(axis=1), p.sum(axis=0) + + +def array_to_dict(omega_a: AminoacidAlphabet, omega_b: ScoredAminoacidAlphabet, p: np.ndarray, q_a: np.ndarray, + q_b: np.ndarray) -> Tuple[Dict, Dict, Dict]: + p = {a: {b: p[i, j] for j, b in enumerate(omega_b)} for i, a in enumerate(omega_a)} + q_a = {a: q_a[i] for i, a in enumerate(omega_a)} + q_b = {b: q_b[i] for i, b in enumerate(omega_b)} + return p, q_a, q_b + + +def _truncate_score_to_nerpa_cfg(config: Dict, score: float): + for i in range(len(config['Scores'])): + if config['Scores'][i] <= score: + return config['Scores'][i] + + +def get_prob_gen(config: Dict, score: float, p_score: Dict, correct: bool): + score = _truncate_score_to_nerpa_cfg(config, score) + for i in range(len(config['Scores'])): + if config['Scores'][i] == score: + return np.exp(config['ProbGenCorrect' if correct else 'ProbGenIncorrect'][i]) * \ + p_score[_truncate_score_to_nerpa_cfg(config, score)] + + +def same_modifications(x: Symbol, mod: str, methylation: bool) -> bool: + return x.modification == mod and x.methylation == methylation + + +def get_names_alphabet(omega_a: AminoacidAlphabet, omega_b: ScoredAminoacidAlphabet) -> List[str]: + return list(set([a.name for a in omega_a] + [b.name for b in omega_b])) + + +def generate_p_score(config, data: TwoSequenceListsData) -> Dict[int, int]: + p_score = defaultdict(float) + div_term = 0 + for seq in data.sequences2: + for symb in seq.symbols: + score = int(_truncate_score_to_nerpa_cfg(config, symb.score)) + p_score[score] += 1 + div_term += 1 + return {score: p / div_term for score, p in p_score.items()} + + +def generate_p_mods(data: TwoSequenceListsData) -> Dict[str, int]: + p_mods = defaultdict(float) + div_term = 0 + for seq in data.sequences1: + for symb in seq.symbols: + if symb.modification is not None: + div_term += 1 + p_mods[symb.modification] += 1 + for seq in data.sequences2: + for symb in seq.symbols: + if symb.modification is not None: + div_term += 1 + p_mods[symb.modification] += 1 + return {mod: p / div_term for mod, p in p_mods.items()}