push github

judithhariprakash · Feb 11, 2018 · 788f80e · 788f80e
commit 788f80e
Show file tree

Hide file tree

Showing 41 changed files with 5,820 additions and 0 deletions.
diff --git a/circRNA.py b/circRNA.py
@@ -0,0 +1,145 @@
+import sys
+import requests
+import os
+import uuid
+import json
+
+SPECIES = 'homo_sapiens'
+ASSEMBLY = 'GRCh38'
+OFFSET = 200
+
+def get_input():
+    try:
+        coord = sys.argv[1].rstrip('\n')
+        strand = sys.argv[2].rstrip('\n')
+        outfile = sys.argv[3].rstrip('\n')
+    except IndexError:
+        print "USAGE:\npython circRNA.py coordinates strand outfile\n"
+        print "\t'coordinates': chrom:start-end format (exp: 10:25000000-25001000)"
+        print "\t'strand'     : can be '+' or '-'"
+        print "\t'outfile'    : Give a name where the output will be saved in CSV format"
+        sys.exit()
+    coord = coord.rstrip('\n').rstrip('\r')
+    strand = strand.rstrip('\n').rstrip('\r')
+    try:
+        chrom, start, end = parse_coords(coord)
+    except:
+        print "ERROR: Please provide coordinate in chrom:start-end format (exp: 10:25000000-25001000)"
+        sys.exit()
+    if strand not in ['+', '-']:
+        print "ERROR: Strand should be either '+' or '-'"
+        sys.exit()
+    return chrom.upper().replace('CHR', ''), start, end, strand, outfile
+
+def parse_coords(coord):
+    chrom = coord.split(":")[0]
+    start = coord.split(":")[1].split("-")[0]
+    end = coord.split(":")[1].split("-")[1]
+    print "INFO: Input coordinates successfully parsed as following:"
+    print "INFO: Chrom:%s Start:%s End:%s" % (chrom, start, end)
+    return chrom, start, end
+
+def get_circ_coordinates(start, end, strand, offset=OFFSET):
+    # if its positive strand the offset is subtracted from end and add to the start sequence
+    if strand == "+":
+        start1 = int(end) - offset
+        end1 = int(end)
+        start2 = int(start)
+        end2 = int(start) + offset
+    # if its negative strand, reverse complement of the obtained sequence is joined together
+    elif strand == "-":
+        start1 = int(start)
+        end1 = int(start) + offset
+        start2 = int(end) - offset
+        end2 = int(end)
+    else:
+        return False
+    print "INFO: Calculation of backspliced coordinates with offset of %dbp complete" % offset
+    return ((start1, end1), (start2, end2))
+
+def fetch_ensembl(chrom, start, end, strand, species=SPECIES, assembly=ASSEMBLY):
+    print "INFO: Will connect to ENSEMBL for species: %s and assembly:%s" % (species, assembly)
+    server = "https://rest.ensembl.org"
+    s = '1' if strand == '+' else '-1'
+    ext = "/sequence/region/%s/%s:%s..%s:%s?coord_system_version=%s" % (
+        species, chrom, str(start), str(end), s, assembly)
+    print "INFO:", server+ext
+    r = requests.get(server+ext, headers={ "Content-Type" : "text/plain"})
+    if not r.ok:
+        print "ERROR: Unable to contact Ensembl to fetch sequences. Exiting now.."
+        sys.exit()
+    print "INFO: Ensembl query successful"
+    return r.text
+
+def get_backspliced_seq(circ_cords):
+    print "INFO: Connecting to Ensembl for fetching 5' sequence"
+    seq1 = fetch_ensembl(chrom, circ_cords[0][0], circ_cords[0][1], strand)
+    print "INFO: Connecting to Ensembl for fetching 3' sequence"
+    seq2 = fetch_ensembl(chrom, circ_cords[1][0], circ_cords[1][1], strand)
+    # Strand info is allow implicit in seq1 and seq2
+    return seq1+seq2
+
+def run_primer3(seq):
+    print "INFO: Creating template file for Primer3 with template length of: %d" % len(seq)
+    temp_fn = str(uuid.uuid4())
+    with open(temp_fn, 'w') as OUT:
+        OUT.write("SEQUENCE_ID=sequence\nSEQUENCE_TEMPLATE=%s\n=\n" % seq)
+    script_dir = os.path.dirname(os.path.realpath(__file__))
+    exe = "%s/primer3_bin/primer3_core" % script_dir
+    settings_fn = "%s/primer3_bin/circrna_primers_settings.p3" % script_dir
+    print "INFO: Using Primer3 settings file: %s" % settings_fn
+    cmd = "%s -p3_settings_file=%s -echo_settings_file %s" % (exe, settings_fn, temp_fn)
+    print "INFO: Running Primer3..."
+    print "INFO: ", cmd
+    output = os.popen(cmd).read()
+    os.system('rm %s' % temp_fn)
+    print "INFO: Removed temporary input file for Primer3"
+    return output
+
+def prep_output(raw_output):
+    output_dict = {}
+    for line in raw_output.split('\n'):
+        cols = line.rstrip('\n').split('=')
+        try:
+            output_dict[cols[0]] = cols[1]
+        except IndexError:
+            pass
+    num_primers =  int(output_dict['PRIMER_PAIR_NUM_RETURNED'])
+    valid_primers = 0
+    if num_primers > 0:
+        parsed = [','.join([
+            "Primer ID", "Product size", "Left primer", "Right primer",
+            "Left GC", "Right GC", "Left TM", "Right TM",
+            "Left pos", "Right pos", "Left size", "Right size"
+        ])]
+        for i in range(num_primers):
+            left_pos = output_dict['PRIMER_LEFT_%d' % i].split(',')[0]
+            right_pos = output_dict['PRIMER_RIGHT_%d' % i].split(',')[0]
+            if int(left_pos) < 195 and int(right_pos) > 205:
+                valid_primers += 1
+                parsed.append(",".join([
+                    str(i), output_dict['PRIMER_PAIR_%d_PRODUCT_SIZE' % i],
+                    output_dict['PRIMER_LEFT_%d_SEQUENCE' % i], output_dict['PRIMER_RIGHT_%d_SEQUENCE' % i],
+                    output_dict['PRIMER_LEFT_%d_GC_PERCENT' % i], output_dict['PRIMER_RIGHT_%d_GC_PERCENT' % i],
+                    output_dict['PRIMER_LEFT_%d_TM' % i], output_dict['PRIMER_RIGHT_%d_TM' % i],
+                    left_pos, right_pos,
+                    output_dict['PRIMER_LEFT_%d' % i].split(',')[1], output_dict['PRIMER_RIGHT_%d' % i].split(',')[1]
+                ]))
+    else:
+        print "WARNING: No primers found"
+        return "No primers found"
+    print "INFO: %d primers found" % valid_primers
+    return '\n'.join(parsed)
+
+if __name__ == '__main__':
+    print "Welcome to CircPrimer"    
+    chrom, start, end, strand, outfile = get_input()
+    cc = get_circ_coordinates(start, end, strand)
+    back_seq = get_backspliced_seq(cc)
+    primer3_output = run_primer3(back_seq)
+    with open("%s_raw_primer3.csv" % outfile, 'w') as OUT:
+        OUT.write(primer3_output)
+    primers_table = prep_output(primer3_output)
+    with open("%s.csv" % outfile, 'w') as OUT:
+        OUT.write(primers_table)
+    print "SUCCESS: Analysis complete.. Primers are saved in %s.csv" % outfile
diff --git a/primer3_bin/circrna_primers_settings.p3 b/primer3_bin/circrna_primers_settings.p3
@@ -0,0 +1,37 @@
+Primer3 File - http://primer3.sourceforge.net
+P3_FILE_TYPE=settings
+
+SEQUENCE_TARGET=195,10
+PRIMER_NUM_RETURN=20
+PRIMER_TASK=generic
+PRIMER_PICK_LEFT_PRIMER=1
+PRIMER_PICK_RIGHT_PRIMER=1
+PRIMER_PRODUCT_SIZE_RANGE=100-400
+PRIMER_OPT_SIZE=20
+PRIMER_MIN_SIZE=18
+PRIMER_MAX_SIZE=23
+PRIMER_MIN_TM=57.0
+PRIMER_OPT_TM=59.0
+PRIMER_MAX_TM=63.0
+PRIMER_PAIR_MAX_DIFF_TM=3.0
+PRIMER_MIN_GC=30.0
+PRIMER_MAX_GC=70.0
+PRIMER_EXPLAIN_FLAG=1
+PRIMER_THERMODYNAMIC_PARAMETERS_PATH=primer3_bin/primer3_config/
+PRIMER_MIN_THREE_PRIME_DISTANCE=3
+PRIMER_MAX_END_STABILITY=9.0
+PRIMER_MAX_SELF_ANY_TH=45.00
+PRIMER_MAX_SELF_END_TH=35.00
+PRIMER_PAIR_MAX_COMPL_ANY_TH=45.00
+PRIMER_PAIR_MAX_COMPL_END_TH=35.00
+PRIMER_MAX_HAIRPIN_TH=24.00
+PRIMER_MAX_TEMPLATE_MISPRIMING_TH=40.00
+PRIMER_PAIR_MAX_TEMPLATE_MISPRIMING_TH=70.00
+PRIMER_MAX_POLY_X=4
+PRIMER_LIBERAL_BASE=1
+PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=0
+PRIMER_WT_POS_PENALTY=0.0
+PRIMER_INTERNAL_SALT_DIVALENT=1.5
+PRIMER_MAX_TEMPLATE_MISPRIMING=12.00
+PRIMER_PAIR_MAX_TEMPLATE_MISPRIMING=24.00
+=
diff --git a/primer3_bin/long_seq_tm_test b/primer3_bin/long_seq_tm_test
diff --git a/primer3_bin/ntdpal b/primer3_bin/ntdpal
diff --git a/primer3_bin/ntthal b/primer3_bin/ntthal
diff --git a/primer3_bin/oligotm b/primer3_bin/oligotm
diff --git a/primer3_bin/primer3_config/dangle.dh b/primer3_bin/primer3_config/dangle.dh
@@ -0,0 +1,128 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+-500
+4700
+-4100
+-3800
+0
+0
+0
+0
+0
+0
+0
+0
+-5900
+-2600
+-3200
+-5200
+0
+0
+0
+0
+0
+0
+0
+0
+-2100
+-200
+-3900
+-4400
+0
+0
+0
+0
+0
+0
+0
+0
+-700
+4400
+-1600
+2900
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+-2900
+-4100
+-4200
+-200
+0
+0
+0
+0
+0
+0
+0
+0
+-3700
+-4000
+-3900
+-4900
+0
+0
+0
+0
+0
+0
+0
+0
+-6300
+-4400
+-5100
+-4000
+0
+0
+0
+0
+0
+0
+0
+0
+200
+600
+-1100
+-6900
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
-Original file line number
+Diff line change
@@ -0,0 +1,128 @@
+    -500
+    -4100
+    -3800
+    -5900
+    -2600
+    -3200
+    -5200
+    -2100
+    -200
+    -3900
+    -4400
+    -700
+    -1600
+    -2900
+    -4100
+    -4200
+    -200
+    -3700
+    -4000
+    -3900
+    -4900
+    -6300
+    -4400
+    -5100
+    -4000
+    -1100
+    -6900