oncokb
diff --git a/‎AnnotatorCore.py
+65-47 b/‎AnnotatorCore.py
+65-47
diff --git a/‎ClinicalDataAnnotator.py
+25-40 b/‎ClinicalDataAnnotator.py
+25-40
diff --git a/‎CnaAnnotator.py
+39-57 b/‎CnaAnnotator.py
+39-57
@@ -1,10 +1,8 @@
 #!/usr/bin/python
 
 import sys
-import getopt
 import csv
-import json
-import urllib
+import requests
 import os.path
 import re
 import matplotlib
@@ -14,12 +12,17 @@
 
 csv.field_size_limit(sys.maxsize) # for reading large files
 
-oncokbapiurl = "https://oncokb.org/api/v1"
+oncokbapiurl = "https://www.oncokb.org/api/v1"
+oncokbapibearertoken = ""
 
 def setoncokbbaseurl(u):
     global oncokbapiurl
     oncokbapiurl = u.rstrip('/') + '/api/v1'
 
+def setoncokbapitoken(t):
+    global oncokbapibearertoken
+    oncokbapibearertoken = t.strip()
+
 cancerhotspotsbaseurl = "http://www.cancerhotspots.org"
 def setcancerhotspotsbaseurl(u):
     global cancerhotspotsbaseurl
@@ -109,17 +112,23 @@ def generateReadme(outfile):
     outf.close()
 
 def gethotspots(url, type):
-    hotspotsjson = json.load(urllib.urlopen(url))
     hotspots = {}
-    for hs in hotspotsjson:
-        gene = hs['hugoSymbol']
-        start = hs['aminoAcidPosition']['start']
-        end = hs['aminoAcidPosition']['end']
-        if type is None or hs['type'] == type:
-            if gene not in hotspots:
-                hotspots[gene] = set()
-            for i in range(start, end + 1):
-                hotspots[gene].add(i)
+    response = requests.get(url)
+    if response.status_code == 200:
+        hotspotsjson = response.json()
+
+        for hs in hotspotsjson:
+            gene = hs['hugoSymbol']
+            start = hs['aminoAcidPosition']['start']
+            end = hs['aminoAcidPosition']['end']
+            if type is None or hs['type'] == type:
+                if gene not in hotspots:
+                    hotspots[gene] = set()
+                for i in range(start, end + 1):
+                    hotspots[gene].add(i)
+    else:
+        print "error when processing %s" % url
+        print "reason: %s" % response.reason
     return hotspots
 
 missensesinglehotspots = None
@@ -920,40 +929,49 @@ def pulloncokb(key, url):
         oncokbdata['oncogenic'] = ""
 
         try:
-            evidences = json.load(urllib.urlopen(url))
-            # if not evidences['geneExist'] or (not evidences['variantExist'] and not evidences['alleleExist']):
-            #     return ''
-
-            # mutation effect
-            if (evidences['mutationEffect'] is not None):
-                oncokbdata['mutation_effect'] = evidences['mutationEffect']['knownEffect']
-                oncokbdata['citations'] = appendoncokbcitations(oncokbdata['citations'],
-                                                                evidences['mutationEffect']['citations']['pmids'],
-                                                                evidences['mutationEffect']['citations']['abstracts'])
-
-            # oncogenic
-            oncokbdata['oncogenic'] = evidences['oncogenic']
-
-            # get treatment
-            for treatment in evidences['treatments']:
-                level = treatment['level']
-
-                if level not in levels:
-                    print level + " is ignored"
-                    # oncokbdata[level].append('')
-                else:
-                    drugs = treatment['drugs']
-
-                    oncokbdata['citations'] = appendoncokbcitations(oncokbdata['citations'], treatment['pmids'],
-                                                                    treatment['abstracts'])
-
-                    if len(drugs) == 0:
-                        oncokbdata[level].append('[NOT SPECIFIED]')
+            headers = {
+                'Content-Type': 'application/json',
+                'Authorization': 'Bearer %s' % oncokbapibearertoken
+            }
+            response = requests.get(url, headers=headers)
+            if response.status_code == 200:
+                evidences = response.json()
+                # if not evidences['geneExist'] or (not evidences['variantExist'] and not evidences['alleleExist']):
+                #     return ''
+
+                # mutation effect
+                if (evidences['mutationEffect'] is not None):
+                    oncokbdata['mutation_effect'] = evidences['mutationEffect']['knownEffect']
+                    oncokbdata['citations'] = appendoncokbcitations(oncokbdata['citations'],
+                                                                    evidences['mutationEffect']['citations']['pmids'],
+                                                                    evidences['mutationEffect']['citations']['abstracts'])
+
+                # oncogenic
+                oncokbdata['oncogenic'] = evidences['oncogenic']
+
+                # get treatment
+                for treatment in evidences['treatments']:
+                    level = treatment['level']
+
+                    if level not in levels:
+                        print level + " is ignored"
+                        # oncokbdata[level].append('')
                     else:
-                        drugnames = []
-                        for drug in drugs:
-                            drugnames.append(drug['drugName'])
-                        oncokbdata[level].append('+'.join(drugnames))
+                        drugs = treatment['drugs']
+
+                        oncokbdata['citations'] = appendoncokbcitations(oncokbdata['citations'], treatment['pmids'],
+                                                                        treatment['abstracts'])
+
+                        if len(drugs) == 0:
+                            oncokbdata[level].append('[NOT SPECIFIED]')
+                        else:
+                            drugnames = []
+                            for drug in drugs:
+                                drugnames.append(drug['drugName'])
+                            oncokbdata[level].append('+'.join(drugnames))
+            else:
+                print "error when processing %s" % url
+                print "reason: %s" % response.reason
         except:
             print "error when processing " + url
             # sys.exit()
 
@@ -1,53 +1,38 @@
 #!/usr/bin/python
 
 import sys
-import getopt
+import argparse
 from AnnotatorCore import *
 
-def main(argv):
-
-    inputclinicalfile = ''
-    outputclinicalfile = ''
-    annotatedalterationfiles = []
-
-    try:
-        opts, args = getopt.getopt(argv, "hi:o:a:s:")
-    except getopt.GetoptError:
-        print 'for help: python ClinicalDataAnnotator.py -h'
-        sys.exit(2)
 
-    for opt, arg in opts:
-        if opt == '-h':
-            print 'ClinicalDataAnnotator.py -i <input clinical file> -o <output clinical file> -a <annotated alteration files, separate by ,> [-s sample list filter]'
-            print '  Essential clinical columns:'
-            print '    SAMPLE_ID: sample ID'
-            sys.exit()
-        elif opt in ("-i"):
-            inputclinicalfile = arg
-        elif opt in ("-o"):
-            outputclinicalfile = arg
-        elif opt in ("-a"):
-            annotatedalterationfiles = arg.split(',')
-        elif opt in ("-s"):
-            setsampleidsfileterfile(arg)
-
-    if inputclinicalfile == '' or outputclinicalfile=='' or len(annotatedalterationfiles)==0:
+def main(argv):
+    if argv.help:
+        print 'ClinicalDataAnnotator.py -i <input clinical file> -o <output clinical file> -a <annotated alteration files, separate by ,> [-s sample list filter]'
+        print '  Essential clinical columns:'
+        print '    SAMPLE_ID: sample ID'
+        sys.exit()
+    if argv.sample_ids_filter:
+        setsampleidsfileterfile(argv.sample_ids_filter)
+
+    annotated_alteration_files = re.split(',|, ', argv.annotated_alteration_files)
+    if argv.input_file == '' or argv.output_file == '' or len(annotated_alteration_files) == 0:
         print 'for help: python ClinicalDataAnnotator.py -h'
         sys.exit(2)
 
-    print 'annotating '+inputclinicalfile+"..."
-
-    processclinicaldata(annotatedalterationfiles, inputclinicalfile, outputclinicalfile)
+    print 'annotating %s ...' % argv.input_file
+    processclinicaldata(annotated_alteration_files, argv.input_file, argv.output_file)
 
     print 'done!'
 
+
 if __name__ == "__main__":
-    # argv = [
-    #     '-i', 'data/example_clinical.txt',
-    #     '-o', 'data/example_clinical.oncokb.txt',
-    #     '-a', 'data/example_maf.oncokb.txt,data/example_cna.oncokb.txt'
-    # ]
-    # main(argv)
-
-    # print sys.argv[1:]
-    main(sys.argv[1:])
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument('-h', dest='help', action="store_true", default=False)
+    parser.add_argument('-i', dest='input_file', default='', type=str)
+    parser.add_argument('-o', dest='output_file', default='', type=str)
+    parser.add_argument('-s', dest='sample_ids_filter', default='', type=str)
+    parser.add_argument('-a', dest='annotated_alteration_files', default='', type=str)
+    parser.set_defaults(func=main)
+
+    args = parser.parse_args()
+    args.func(args)
@@ -1,72 +1,54 @@
 #!/usr/bin/python
 
 import sys
-import getopt
+import argparse
 from AnnotatorCore import *
 
-def main(argv):
-
-    baseurl = 'http://oncokb.org'
-    inputcnafile = ''
-    inputclinicalfile = ''
-    outputcnafile = ''
-    previousresultfile = ''
-    defaultcancertype = 'cancer'
 
-    try:
-        opts, args = getopt.getopt(argv, "hi:o:p:c:s:d:t:u:")
-    except getopt.GetoptError:
+def main(argv):
+    if argv.help:
+        print 'CnaAnnotator.py -i <input CNA file> -o <output CNA file> [-p previous results] [-c <input clinical file>] [-s sample list filter] [-t <default tumor type>] [-u oncokb-base-url] [-b oncokb_api_bear_token]'
+        print '  Input CNA file should follow the GISTIC output (https://cbioportal.readthedocs.io/en/latest/File-Formats.html#discrete-copy-number-data)'
+        print '  Essential clinical columns:'
+        print '    SAMPLE_ID: sample ID'
+        print '  Cancer type will be assigned based on the following priority:'
+        print '     1) ONCOTREE_CODE in clinical data file'
+        print '     2) ONCOTREE_CODE exist in MAF'
+        print '     3) default tumor type (-t)'
+        print '  Default OncoKB base url is http://oncokb.org'
+        sys.exit()
+    if argv.input_file == '' or argv.output_file == '' or argv.oncokb_api_bearer_token == '':
         print 'for help: python CnaAnnotator.py -h'
         sys.exit(2)
-
-    for opt, arg in opts:
-        if opt == '-h':
-            print 'CnaAnnotator.py -i <input cNA file> -o <output MAF file> [-p previous results] [-c <input clinical file>] [-s sample list filter] [-t <default tumor type>] [-u base-url]'
-            print '  Input CNA file should follow the GISTIC output (https://cbioportal.readthedocs.io/en/latest/File-Formats.html#discrete-copy-number-data)'
-            print '  Essential clinical columns:'
-            print '    SAMPLE_ID: sample ID'
-            print '  Cancer type will be assigned based on the following priority:'
-            print '     1) ONCOTREE_CODE in clinical data file'
-            print '     2) ONCOTREE_CODE exist in MAF'
-            print '     3) default tumor type (-t)'
-            print '  Default OncoKB base url is http://oncokb.org'
-            sys.exit()
-        elif opt in ("-i"):
-            inputcnafile = arg
-        elif opt in ("-o"):
-            outputcnafile = arg
-        elif opt in ("-p"):
-            previousresultfile = arg
-        elif opt in ("-c"):
-            inputclinicalfile = arg
-        elif opt in ("-s"):
-            setsampleidsfileterfile(arg)
-        elif opt in ("-t"):
-            defaultcancertype = arg
-        elif opt in ("-u"):
-            setoncokbbaseurl(arg)
-
-    if inputcnafile == '' or outputcnafile=='':
-        print 'for help: python MafAnnotator.py -h'
-        sys.exit(2)
+    if argv.sample_ids_filter:
+        setsampleidsfileterfile(argv.sample_ids_filter)
+    if argv.oncokb_api_url:
+        setoncokbbaseurl(argv.oncokb_api_url)
+    setoncokbapitoken(argv.oncokb_api_bearer_token)
 
     cancertypemap = {}
-    if inputclinicalfile != '':
-        readCancerTypes(inputclinicalfile, cancertypemap)
-
-    print 'annotating '+inputcnafile+"..."
+    if argv.input_clinical_file:
+        readCancerTypes(argv.input_clinical_file, cancertypemap)
 
-    processcnagisticdata(inputcnafile, outputcnafile, previousresultfile, defaultcancertype, cancertypemap, False)
+    print 'annotating %s ...' % argv.input_file
+    processcnagisticdata(argv.input_file, argv.output_file, argv.previous_result_file, argv.default_cancer_type,
+                         cancertypemap, False)
 
     print 'done!'
 
-if __name__ == "__main__":
-    # argv = [
-    #     '-i', 'data/example_cna.txt',
-    #     '-o', 'data/example_cna.oncokb.txt',
-    #     '-c', 'data/example_clinical.txt',
-    # ]
-    # main(argv)
 
-    # print sys.argv[1:]
-    main(sys.argv[1:])
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument('-h', dest='help', action="store_true", default=False)
+    parser.add_argument('-i', dest='input_file', default='', type=str)
+    parser.add_argument('-o', dest='output_file', default='', type=str)
+    parser.add_argument('-p', dest='previous_result_file', default='', type=str)
+    parser.add_argument('-c', dest='input_clinical_file', default='', type=str)
+    parser.add_argument('-s', dest='sample_ids_filter', default='', type=str)
+    parser.add_argument('-t', dest='default_cancer_type', default='cancer', type=str)
+    parser.add_argument('-u', dest='oncokb_api_url', default='', type=str)
+    parser.add_argument('-b', dest='oncokb_api_bearer_token', default='', type=str)
+    parser.set_defaults(func=main)
+
+    args = parser.parse_args()
+    args.func(args)