EBIvariation · tskir · Jan 31, 2020 · Jan 28, 2020 · Jan 28, 2020 · Jan 28, 2020
diff --git a/bin/trait_mapping/create_efo_table.py b/bin/trait_mapping/create_efo_table.py
@@ -52,7 +52,12 @@ def uri_to_curie(uri):
 def get_cross_references(curie):
     """Queries OxO to return the list of cross-references for a given term curie."""
     url = oxo_url_template.format(curie=curie)
-    mappings = requests.get(url).json()['_embedded']['searchResults'][0]['mappingResponseList']
+    response = requests.get(url).json()
+    if '_embedded' not in response:
+        print('Warning: OxO error for term {}. No cross-links will be available for this term. '
+              'See https://github.com/EBISPOT/OXO/issues/26'.format(curie))
+        return []
+    mappings = response['_embedded']['searchResults'][0]['mappingResponseList']
     return [m['curie'] for m in mappings]
 
 

diff --git a/docs/build.md b/docs/build.md
@@ -102,22 +102,5 @@ questions about local OLS installation.
    * To install to develop: `python3 setup.py develop`
    * To build a source distribution: `python3 setup.py sdist`
 
-## Regenerating test data
-All the test does (for the moment) is checking that parsing 10 records from the XML will (1) not crash and (2)
-provide 10 records after parsing. So to regenerate test data, we just have to extract any 10 records (can just be the
-first 10 records) from the ClinVar XML file:
-
-```bash
-CLINVAR_RELEASE="2019-01"  # set the correct one
-zcat ClinVarFullRelease_${CLINVAR_RELEASE}.xml.gz \
-  | awk 'BEGIN {RS="</ClinVarSet>\n\n"; ORS=RS} {print} NR==10 {exit}' \
-  > ClinvarExample.xml
-echo "</ReleaseSet>" >> ClinvarExample.xml
-gzip -c <ClinvarExample.xml >ClinvarExample.xml.gz
-```
-
-Eyeball input & output files to ensure that the ClinVar format has not changed sufficiently enough to render this
-snippet invalid. Then put the generated files into `clinvar-xml-parser/src/test/resources/` directory.
-
 ## Tests
 You can run all tests with: `python setup.py test`
diff --git a/docs/manual-curation.md b/docs/manual-curation.md
@@ -88,16 +88,12 @@ The “Status” column has the following acceptable values:
 Sometimes, especially when copy-pasting information from external sources, a mapping label or URL can contain an additional space symbol (at the beginning or end) or an accidental line break. This causes problems in the downstream processing and must be manually removed. To minimise the occurences of this, Google Sheets template includes a validation formula for the first two columns (“URI of selected mapping” and “Label of selected mapping”). If it detects an extra space symbol or a line break, the cell will be highlighted in red.
 
 ## Exporting curation results
-Once the manual curation is complete, export the results to a file named `finished_mappings_curation.tsv` and save it to `${BATCH_ROOT}/trait_mapping` directory. This file must consist of three columns from the curation spreadsheet: “ClinVar label”; “URI of selected mapping”; “Label of selected mapping”, in that order. Make sure to only export the mappings which the curator marked as done.
-
-Sometimes “Mapping to use” column may contain newline characters inserted by accident; if present, remove them using a global regexp search in Google Sheets.
-
-After that, two files with mappings must be concatenated to a single file to be used as input for the evidence string generation:
-* `automated_trait_mappings.tsv`
-  + Mappings generated automatically by the trait mapping pipeline and already considered “finished”
-* `finished_mappings_curation.tsv`
-  + Eyeballed good quality mappings
-  + Manually curated medium and low quality mappings
-  + New mappings for previously unmapped traits
-
-The resulting file must be named `trait_names_to_ontology_mappings.tsv` and saved to `${BATCH_ROOT}/trait_mapping` directory as well.
+Once the manual curation is completed, apply a spreadsheet filter so that only traits with Status = DONE are visible. Copy data for all non-empty rows from three columns: “ClinVar label”; “URI of selected mapping”; “Label of selected mapping”, in that order. **Do not include header lines.** Save the data to a file `${BATCH_ROOT}/trait_mapping/finished_mappings_curation.tsv`.
+
+Concatenate automated and manual mappings into a single file:
+```bash
+cat \
+  ${BATCH_ROOT}/trait_mapping/automated_trait_mappings.tsv \
+  ${BATCH_ROOT}/trait_mapping/finished_mappings_curation.tsv \
+> ${BATCH_ROOT}/trait_mapping/trait_names_to_ontology_mappings.tsv
+```
diff --git a/docs/submit-opentargets-batch.md b/docs/submit-opentargets-batch.md
diff --git a/eva_cttv_pipeline/evidence_string_generation/resources/CTTVGeneticsEvidenceString.json b/eva_cttv_pipeline/evidence_string_generation/resources/CTTVGeneticsEvidenceString.json
@@ -6,7 +6,7 @@
     "type": null,
     "id": []
   },
-  "validated_against_schema_version": "1.6.2",
+  "validated_against_schema_version": "1.6.3",
   "disease": {
     "id": []
   },

diff --git a/eva_cttv_pipeline/evidence_string_generation/resources/CTTVSomaticEvidenceString.json b/eva_cttv_pipeline/evidence_string_generation/resources/CTTVSomaticEvidenceString.json
@@ -2,7 +2,7 @@
   "type": "somatic_mutation",
   "access_level": "public",
   "sourceID": "eva_somatic",
-  "validated_against_schema_version": "1.6.2",
+  "validated_against_schema_version": "1.6.3",
   "disease": {
     "id": []
   },

diff --git a/tests/evidence_string_generation/resources/opentargets.1.6.2.json.gz b/tests/evidence_string_generation/resources/opentargets.1.6.2.json.gz
diff --git a/tests/evidence_string_generation/resources/opentargets.1.6.3.json.gz b/tests/evidence_string_generation/resources/opentargets.1.6.3.json.gz
diff --git a/tests/evidence_string_generation/test_evidence_strings.py b/tests/evidence_string_generation/test_evidence_strings.py
@@ -60,7 +60,7 @@ def test_evidence_string(self):
             {"lit_id": "http://europepmc.org/abstract/MED/21697857"}]
     },
     "disease": {"id": ["http://www.orpha.net/ORDO/Orphanet_886"]},
-    "validated_against_schema_version": "1.6.2",
+    "validated_against_schema_version": "1.6.3",
     "target": {
         "target_type": "http://identifiers.org/cttv.target/gene_variant",
         "id": "http://identifiers.org/ensembl/ENSG00000163646",
@@ -182,7 +182,7 @@ def test_evidence_string(self):
         test_dict = {
     "literature": {"references": [{"lit_id": "http://europepmc.org/abstract/MED/8281160"}]},
     "disease": {"id": ["http://www.ebi.ac.uk/efo/EFO_0000232"]},
-    "validated_against_schema_version": "1.6.2",
+    "validated_against_schema_version": "1.6.3",
     "target": {
         "target_type": "http://identifiers.org/cttv.target/gene_variant",
         "id": "http://identifiers.org/ensembl/ENSG00000134982",
@@ -291,7 +291,7 @@ def setUp(self):
         self.test_args = get_args_CTTVGeneticsEvidenceString_init()
         self.test_ges = evidence_strings.CTTVGeneticsEvidenceString(*self.test_args)
         ot_schema_path = os.path.join(
-            os.path.dirname(__file__), 'resources', 'opentargets.1.6.2.json.gz')
+            os.path.dirname(__file__), 'resources', 'opentargets.1.6.3.json.gz')
         self.ot_schema_contents = json.loads(gzip.open(ot_schema_path).read().decode('utf-8'))
 
     # CTTVEvidenceString tests
@@ -457,7 +457,7 @@ def setUp(self):
         test_args = get_args_CTTVSomaticEvidenceString_init()
         self.test_ses = evidence_strings.CTTVSomaticEvidenceString(*test_args)
         ot_schema_path = os.path.join(
-            os.path.dirname(__file__), 'resources', 'opentargets.1.6.2.json.gz')
+            os.path.dirname(__file__), 'resources', 'opentargets.1.6.3.json.gz')
         self.ot_schema_contents = json.loads(gzip.open(ot_schema_path).read().decode('utf-8'))
 
     def test_db_xref_url(self):