-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 788f80e
Showing
41 changed files
with
5,820 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
import sys | ||
import requests | ||
import os | ||
import uuid | ||
import json | ||
|
||
SPECIES = 'homo_sapiens' | ||
ASSEMBLY = 'GRCh38' | ||
OFFSET = 200 | ||
|
||
def get_input(): | ||
try: | ||
coord = sys.argv[1].rstrip('\n') | ||
strand = sys.argv[2].rstrip('\n') | ||
outfile = sys.argv[3].rstrip('\n') | ||
except IndexError: | ||
print "USAGE:\npython circRNA.py coordinates strand outfile\n" | ||
print "\t'coordinates': chrom:start-end format (exp: 10:25000000-25001000)" | ||
print "\t'strand' : can be '+' or '-'" | ||
print "\t'outfile' : Give a name where the output will be saved in CSV format" | ||
sys.exit() | ||
coord = coord.rstrip('\n').rstrip('\r') | ||
strand = strand.rstrip('\n').rstrip('\r') | ||
try: | ||
chrom, start, end = parse_coords(coord) | ||
except: | ||
print "ERROR: Please provide coordinate in chrom:start-end format (exp: 10:25000000-25001000)" | ||
sys.exit() | ||
if strand not in ['+', '-']: | ||
print "ERROR: Strand should be either '+' or '-'" | ||
sys.exit() | ||
return chrom.upper().replace('CHR', ''), start, end, strand, outfile | ||
|
||
def parse_coords(coord): | ||
chrom = coord.split(":")[0] | ||
start = coord.split(":")[1].split("-")[0] | ||
end = coord.split(":")[1].split("-")[1] | ||
print "INFO: Input coordinates successfully parsed as following:" | ||
print "INFO: Chrom:%s Start:%s End:%s" % (chrom, start, end) | ||
return chrom, start, end | ||
|
||
def get_circ_coordinates(start, end, strand, offset=OFFSET): | ||
# if its positive strand the offset is subtracted from end and add to the start sequence | ||
if strand == "+": | ||
start1 = int(end) - offset | ||
end1 = int(end) | ||
start2 = int(start) | ||
end2 = int(start) + offset | ||
# if its negative strand, reverse complement of the obtained sequence is joined together | ||
elif strand == "-": | ||
start1 = int(start) | ||
end1 = int(start) + offset | ||
start2 = int(end) - offset | ||
end2 = int(end) | ||
else: | ||
return False | ||
print "INFO: Calculation of backspliced coordinates with offset of %dbp complete" % offset | ||
return ((start1, end1), (start2, end2)) | ||
|
||
def fetch_ensembl(chrom, start, end, strand, species=SPECIES, assembly=ASSEMBLY): | ||
print "INFO: Will connect to ENSEMBL for species: %s and assembly:%s" % (species, assembly) | ||
server = "https://rest.ensembl.org" | ||
s = '1' if strand == '+' else '-1' | ||
ext = "/sequence/region/%s/%s:%s..%s:%s?coord_system_version=%s" % ( | ||
species, chrom, str(start), str(end), s, assembly) | ||
print "INFO:", server+ext | ||
r = requests.get(server+ext, headers={ "Content-Type" : "text/plain"}) | ||
if not r.ok: | ||
print "ERROR: Unable to contact Ensembl to fetch sequences. Exiting now.." | ||
sys.exit() | ||
print "INFO: Ensembl query successful" | ||
return r.text | ||
|
||
def get_backspliced_seq(circ_cords): | ||
print "INFO: Connecting to Ensembl for fetching 5' sequence" | ||
seq1 = fetch_ensembl(chrom, circ_cords[0][0], circ_cords[0][1], strand) | ||
print "INFO: Connecting to Ensembl for fetching 3' sequence" | ||
seq2 = fetch_ensembl(chrom, circ_cords[1][0], circ_cords[1][1], strand) | ||
# Strand info is allow implicit in seq1 and seq2 | ||
return seq1+seq2 | ||
|
||
def run_primer3(seq): | ||
print "INFO: Creating template file for Primer3 with template length of: %d" % len(seq) | ||
temp_fn = str(uuid.uuid4()) | ||
with open(temp_fn, 'w') as OUT: | ||
OUT.write("SEQUENCE_ID=sequence\nSEQUENCE_TEMPLATE=%s\n=\n" % seq) | ||
script_dir = os.path.dirname(os.path.realpath(__file__)) | ||
exe = "%s/primer3_bin/primer3_core" % script_dir | ||
settings_fn = "%s/primer3_bin/circrna_primers_settings.p3" % script_dir | ||
print "INFO: Using Primer3 settings file: %s" % settings_fn | ||
cmd = "%s -p3_settings_file=%s -echo_settings_file %s" % (exe, settings_fn, temp_fn) | ||
print "INFO: Running Primer3..." | ||
print "INFO: ", cmd | ||
output = os.popen(cmd).read() | ||
os.system('rm %s' % temp_fn) | ||
print "INFO: Removed temporary input file for Primer3" | ||
return output | ||
|
||
def prep_output(raw_output): | ||
output_dict = {} | ||
for line in raw_output.split('\n'): | ||
cols = line.rstrip('\n').split('=') | ||
try: | ||
output_dict[cols[0]] = cols[1] | ||
except IndexError: | ||
pass | ||
num_primers = int(output_dict['PRIMER_PAIR_NUM_RETURNED']) | ||
valid_primers = 0 | ||
if num_primers > 0: | ||
parsed = [','.join([ | ||
"Primer ID", "Product size", "Left primer", "Right primer", | ||
"Left GC", "Right GC", "Left TM", "Right TM", | ||
"Left pos", "Right pos", "Left size", "Right size" | ||
])] | ||
for i in range(num_primers): | ||
left_pos = output_dict['PRIMER_LEFT_%d' % i].split(',')[0] | ||
right_pos = output_dict['PRIMER_RIGHT_%d' % i].split(',')[0] | ||
if int(left_pos) < 195 and int(right_pos) > 205: | ||
valid_primers += 1 | ||
parsed.append(",".join([ | ||
str(i), output_dict['PRIMER_PAIR_%d_PRODUCT_SIZE' % i], | ||
output_dict['PRIMER_LEFT_%d_SEQUENCE' % i], output_dict['PRIMER_RIGHT_%d_SEQUENCE' % i], | ||
output_dict['PRIMER_LEFT_%d_GC_PERCENT' % i], output_dict['PRIMER_RIGHT_%d_GC_PERCENT' % i], | ||
output_dict['PRIMER_LEFT_%d_TM' % i], output_dict['PRIMER_RIGHT_%d_TM' % i], | ||
left_pos, right_pos, | ||
output_dict['PRIMER_LEFT_%d' % i].split(',')[1], output_dict['PRIMER_RIGHT_%d' % i].split(',')[1] | ||
])) | ||
else: | ||
print "WARNING: No primers found" | ||
return "No primers found" | ||
print "INFO: %d primers found" % valid_primers | ||
return '\n'.join(parsed) | ||
|
||
if __name__ == '__main__': | ||
print "Welcome to CircPrimer" | ||
chrom, start, end, strand, outfile = get_input() | ||
cc = get_circ_coordinates(start, end, strand) | ||
back_seq = get_backspliced_seq(cc) | ||
primer3_output = run_primer3(back_seq) | ||
with open("%s_raw_primer3.csv" % outfile, 'w') as OUT: | ||
OUT.write(primer3_output) | ||
primers_table = prep_output(primer3_output) | ||
with open("%s.csv" % outfile, 'w') as OUT: | ||
OUT.write(primers_table) | ||
print "SUCCESS: Analysis complete.. Primers are saved in %s.csv" % outfile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
Primer3 File - http://primer3.sourceforge.net | ||
P3_FILE_TYPE=settings | ||
|
||
SEQUENCE_TARGET=195,10 | ||
PRIMER_NUM_RETURN=20 | ||
PRIMER_TASK=generic | ||
PRIMER_PICK_LEFT_PRIMER=1 | ||
PRIMER_PICK_RIGHT_PRIMER=1 | ||
PRIMER_PRODUCT_SIZE_RANGE=100-400 | ||
PRIMER_OPT_SIZE=20 | ||
PRIMER_MIN_SIZE=18 | ||
PRIMER_MAX_SIZE=23 | ||
PRIMER_MIN_TM=57.0 | ||
PRIMER_OPT_TM=59.0 | ||
PRIMER_MAX_TM=63.0 | ||
PRIMER_PAIR_MAX_DIFF_TM=3.0 | ||
PRIMER_MIN_GC=30.0 | ||
PRIMER_MAX_GC=70.0 | ||
PRIMER_EXPLAIN_FLAG=1 | ||
PRIMER_THERMODYNAMIC_PARAMETERS_PATH=primer3_bin/primer3_config/ | ||
PRIMER_MIN_THREE_PRIME_DISTANCE=3 | ||
PRIMER_MAX_END_STABILITY=9.0 | ||
PRIMER_MAX_SELF_ANY_TH=45.00 | ||
PRIMER_MAX_SELF_END_TH=35.00 | ||
PRIMER_PAIR_MAX_COMPL_ANY_TH=45.00 | ||
PRIMER_PAIR_MAX_COMPL_END_TH=35.00 | ||
PRIMER_MAX_HAIRPIN_TH=24.00 | ||
PRIMER_MAX_TEMPLATE_MISPRIMING_TH=40.00 | ||
PRIMER_PAIR_MAX_TEMPLATE_MISPRIMING_TH=70.00 | ||
PRIMER_MAX_POLY_X=4 | ||
PRIMER_LIBERAL_BASE=1 | ||
PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=0 | ||
PRIMER_WT_POS_PENALTY=0.0 | ||
PRIMER_INTERNAL_SALT_DIVALENT=1.5 | ||
PRIMER_MAX_TEMPLATE_MISPRIMING=12.00 | ||
PRIMER_PAIR_MAX_TEMPLATE_MISPRIMING=24.00 | ||
= |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
-500 | ||
4700 | ||
-4100 | ||
-3800 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
-5900 | ||
-2600 | ||
-3200 | ||
-5200 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
-2100 | ||
-200 | ||
-3900 | ||
-4400 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
-700 | ||
4400 | ||
-1600 | ||
2900 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
-2900 | ||
-4100 | ||
-4200 | ||
-200 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
-3700 | ||
-4000 | ||
-3900 | ||
-4900 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
-6300 | ||
-4400 | ||
-5100 | ||
-4000 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
200 | ||
600 | ||
-1100 | ||
-6900 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 | ||
0 |
Oops, something went wrong.