Skip to content

Commit

Permalink
push github
Browse files Browse the repository at this point in the history
  • Loading branch information
judithhariprakash committed Feb 11, 2018
0 parents commit 788f80e
Show file tree
Hide file tree
Showing 41 changed files with 5,820 additions and 0 deletions.
145 changes: 145 additions & 0 deletions circRNA.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import sys
import requests
import os
import uuid
import json

SPECIES = 'homo_sapiens'
ASSEMBLY = 'GRCh38'
OFFSET = 200

def get_input():
try:
coord = sys.argv[1].rstrip('\n')
strand = sys.argv[2].rstrip('\n')
outfile = sys.argv[3].rstrip('\n')
except IndexError:
print "USAGE:\npython circRNA.py coordinates strand outfile\n"
print "\t'coordinates': chrom:start-end format (exp: 10:25000000-25001000)"
print "\t'strand' : can be '+' or '-'"
print "\t'outfile' : Give a name where the output will be saved in CSV format"
sys.exit()
coord = coord.rstrip('\n').rstrip('\r')
strand = strand.rstrip('\n').rstrip('\r')
try:
chrom, start, end = parse_coords(coord)
except:
print "ERROR: Please provide coordinate in chrom:start-end format (exp: 10:25000000-25001000)"
sys.exit()
if strand not in ['+', '-']:
print "ERROR: Strand should be either '+' or '-'"
sys.exit()
return chrom.upper().replace('CHR', ''), start, end, strand, outfile

def parse_coords(coord):
chrom = coord.split(":")[0]
start = coord.split(":")[1].split("-")[0]
end = coord.split(":")[1].split("-")[1]
print "INFO: Input coordinates successfully parsed as following:"
print "INFO: Chrom:%s Start:%s End:%s" % (chrom, start, end)
return chrom, start, end

def get_circ_coordinates(start, end, strand, offset=OFFSET):
# if its positive strand the offset is subtracted from end and add to the start sequence
if strand == "+":
start1 = int(end) - offset
end1 = int(end)
start2 = int(start)
end2 = int(start) + offset
# if its negative strand, reverse complement of the obtained sequence is joined together
elif strand == "-":
start1 = int(start)
end1 = int(start) + offset
start2 = int(end) - offset
end2 = int(end)
else:
return False
print "INFO: Calculation of backspliced coordinates with offset of %dbp complete" % offset
return ((start1, end1), (start2, end2))

def fetch_ensembl(chrom, start, end, strand, species=SPECIES, assembly=ASSEMBLY):
print "INFO: Will connect to ENSEMBL for species: %s and assembly:%s" % (species, assembly)
server = "https://rest.ensembl.org"
s = '1' if strand == '+' else '-1'
ext = "/sequence/region/%s/%s:%s..%s:%s?coord_system_version=%s" % (
species, chrom, str(start), str(end), s, assembly)
print "INFO:", server+ext
r = requests.get(server+ext, headers={ "Content-Type" : "text/plain"})
if not r.ok:
print "ERROR: Unable to contact Ensembl to fetch sequences. Exiting now.."
sys.exit()
print "INFO: Ensembl query successful"
return r.text

def get_backspliced_seq(circ_cords):
print "INFO: Connecting to Ensembl for fetching 5' sequence"
seq1 = fetch_ensembl(chrom, circ_cords[0][0], circ_cords[0][1], strand)
print "INFO: Connecting to Ensembl for fetching 3' sequence"
seq2 = fetch_ensembl(chrom, circ_cords[1][0], circ_cords[1][1], strand)
# Strand info is allow implicit in seq1 and seq2
return seq1+seq2

def run_primer3(seq):
print "INFO: Creating template file for Primer3 with template length of: %d" % len(seq)
temp_fn = str(uuid.uuid4())
with open(temp_fn, 'w') as OUT:
OUT.write("SEQUENCE_ID=sequence\nSEQUENCE_TEMPLATE=%s\n=\n" % seq)
script_dir = os.path.dirname(os.path.realpath(__file__))
exe = "%s/primer3_bin/primer3_core" % script_dir
settings_fn = "%s/primer3_bin/circrna_primers_settings.p3" % script_dir
print "INFO: Using Primer3 settings file: %s" % settings_fn
cmd = "%s -p3_settings_file=%s -echo_settings_file %s" % (exe, settings_fn, temp_fn)
print "INFO: Running Primer3..."
print "INFO: ", cmd
output = os.popen(cmd).read()
os.system('rm %s' % temp_fn)
print "INFO: Removed temporary input file for Primer3"
return output

def prep_output(raw_output):
output_dict = {}
for line in raw_output.split('\n'):
cols = line.rstrip('\n').split('=')
try:
output_dict[cols[0]] = cols[1]
except IndexError:
pass
num_primers = int(output_dict['PRIMER_PAIR_NUM_RETURNED'])
valid_primers = 0
if num_primers > 0:
parsed = [','.join([
"Primer ID", "Product size", "Left primer", "Right primer",
"Left GC", "Right GC", "Left TM", "Right TM",
"Left pos", "Right pos", "Left size", "Right size"
])]
for i in range(num_primers):
left_pos = output_dict['PRIMER_LEFT_%d' % i].split(',')[0]
right_pos = output_dict['PRIMER_RIGHT_%d' % i].split(',')[0]
if int(left_pos) < 195 and int(right_pos) > 205:
valid_primers += 1
parsed.append(",".join([
str(i), output_dict['PRIMER_PAIR_%d_PRODUCT_SIZE' % i],
output_dict['PRIMER_LEFT_%d_SEQUENCE' % i], output_dict['PRIMER_RIGHT_%d_SEQUENCE' % i],
output_dict['PRIMER_LEFT_%d_GC_PERCENT' % i], output_dict['PRIMER_RIGHT_%d_GC_PERCENT' % i],
output_dict['PRIMER_LEFT_%d_TM' % i], output_dict['PRIMER_RIGHT_%d_TM' % i],
left_pos, right_pos,
output_dict['PRIMER_LEFT_%d' % i].split(',')[1], output_dict['PRIMER_RIGHT_%d' % i].split(',')[1]
]))
else:
print "WARNING: No primers found"
return "No primers found"
print "INFO: %d primers found" % valid_primers
return '\n'.join(parsed)

if __name__ == '__main__':
print "Welcome to CircPrimer"
chrom, start, end, strand, outfile = get_input()
cc = get_circ_coordinates(start, end, strand)
back_seq = get_backspliced_seq(cc)
primer3_output = run_primer3(back_seq)
with open("%s_raw_primer3.csv" % outfile, 'w') as OUT:
OUT.write(primer3_output)
primers_table = prep_output(primer3_output)
with open("%s.csv" % outfile, 'w') as OUT:
OUT.write(primers_table)
print "SUCCESS: Analysis complete.. Primers are saved in %s.csv" % outfile
37 changes: 37 additions & 0 deletions primer3_bin/circrna_primers_settings.p3
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
Primer3 File - http://primer3.sourceforge.net
P3_FILE_TYPE=settings

SEQUENCE_TARGET=195,10
PRIMER_NUM_RETURN=20
PRIMER_TASK=generic
PRIMER_PICK_LEFT_PRIMER=1
PRIMER_PICK_RIGHT_PRIMER=1
PRIMER_PRODUCT_SIZE_RANGE=100-400
PRIMER_OPT_SIZE=20
PRIMER_MIN_SIZE=18
PRIMER_MAX_SIZE=23
PRIMER_MIN_TM=57.0
PRIMER_OPT_TM=59.0
PRIMER_MAX_TM=63.0
PRIMER_PAIR_MAX_DIFF_TM=3.0
PRIMER_MIN_GC=30.0
PRIMER_MAX_GC=70.0
PRIMER_EXPLAIN_FLAG=1
PRIMER_THERMODYNAMIC_PARAMETERS_PATH=primer3_bin/primer3_config/
PRIMER_MIN_THREE_PRIME_DISTANCE=3
PRIMER_MAX_END_STABILITY=9.0
PRIMER_MAX_SELF_ANY_TH=45.00
PRIMER_MAX_SELF_END_TH=35.00
PRIMER_PAIR_MAX_COMPL_ANY_TH=45.00
PRIMER_PAIR_MAX_COMPL_END_TH=35.00
PRIMER_MAX_HAIRPIN_TH=24.00
PRIMER_MAX_TEMPLATE_MISPRIMING_TH=40.00
PRIMER_PAIR_MAX_TEMPLATE_MISPRIMING_TH=70.00
PRIMER_MAX_POLY_X=4
PRIMER_LIBERAL_BASE=1
PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=0
PRIMER_WT_POS_PENALTY=0.0
PRIMER_INTERNAL_SALT_DIVALENT=1.5
PRIMER_MAX_TEMPLATE_MISPRIMING=12.00
PRIMER_PAIR_MAX_TEMPLATE_MISPRIMING=24.00
=
Binary file added primer3_bin/long_seq_tm_test
Binary file not shown.
Binary file added primer3_bin/ntdpal
Binary file not shown.
Binary file added primer3_bin/ntthal
Binary file not shown.
Binary file added primer3_bin/oligotm
Binary file not shown.
128 changes: 128 additions & 0 deletions primer3_bin/primer3_config/dangle.dh
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
0
0
0
0
0
0
0
0
0
0
0
0
-500
4700
-4100
-3800
0
0
0
0
0
0
0
0
-5900
-2600
-3200
-5200
0
0
0
0
0
0
0
0
-2100
-200
-3900
-4400
0
0
0
0
0
0
0
0
-700
4400
-1600
2900
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
-2900
-4100
-4200
-200
0
0
0
0
0
0
0
0
-3700
-4000
-3900
-4900
0
0
0
0
0
0
0
0
-6300
-4400
-5100
-4000
0
0
0
0
0
0
0
0
200
600
-1100
-6900
0
0
0
0
0
0
0
0
0
0
0
0
Loading

0 comments on commit 788f80e

Please sign in to comment.