diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e69de29 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f53205e --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ + +ontology/cl.owl +ontology/hancestro.owl +ontology/pride_cv_updated.obo +ontology/psi-ms.obo +ontology/pride_cv.obo +ontology/ms.owl +ontology/efo.json +ontology/ncbitaxon.obo +ontology/ncbitaxon.owl +ontology/unimod.csv diff --git a/.streamlit/config.toml b/.streamlit/config.toml new file mode 100644 index 0000000..32e9c61 --- /dev/null +++ b/.streamlit/config.toml @@ -0,0 +1,3 @@ +[server] +maxUploadSize = 1000 +maxMessageSize = 300 diff --git a/EFO_parser.ipynb b/EFO_parser.ipynb new file mode 100644 index 0000000..2c7f0c1 --- /dev/null +++ b/EFO_parser.ipynb @@ -0,0 +1,840 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "* Owlready2 * Warning: optimized Cython parser module 'owlready2_optimized' is not available, defaulting to slower Python implementation\n" + ] + } + ], + "source": [ + "import requests\n", + "import rdflib\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "from rdflib import Graph, Namespace, RDFS, RDF, OWL\n", + "from owlready2 import get_ontology\n", + "from owlready2 import *\n", + "from rdflib import Graph, Namespace, RDFS\n", + "from tqdm import tqdm\n", + "import json\n", + "import pickle\n", + "import ParsingModule\n", + "from ParsingModule import store_as_gzipped_json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def get_children(ontology, term_id, term_label, d, nodes_dict=None, data=None):\n", + " if nodes_dict is None: # load the json file only once\n", + " with open(ontology) as f:\n", + " data = json.load(f)\n", + " nodes_dict = {node[\"id\"]: node[\"lbl\"] for node in data[\"graphs\"][0][\"nodes\"] if all(key in node for key in ['id', 'lbl'])}\n", + "\n", + " if term_id not in nodes_dict:\n", + " return f\"{term_id} node not in ontology\" # node not found in ontology, return early\n", + "\n", + " if term_label not in d: \n", + " d[term_label] = {} # add the parent to the dictionary\n", + "\n", + " for term in data[\"graphs\"][0][\"edges\"]: # iterate through the edges\n", + " if (term[\"obj\"] == term_id) and (term[\"pred\"] in [\"http://purl.obolibrary.org/obo/BFO_0000050\", 'is_a']):\n", + " parent = term[\"sub\"]\n", + " if parent == \"http://purl.obolibrary.org/obo/MONDO_0011876\":\n", + " continue # skip MONDO_0011876\n", + " parent_label = nodes_dict.get(parent)\n", + " if parent_label is not None:\n", + " if parent_label in d:\n", + " d[term_label][parent_label] = d[parent_label]\n", + " del d[parent_label]\n", + " else:\n", + " d[term_label][parent_label] = {}\n", + " get_children(ontology, parent, parent_label, d[term_label], nodes_dict, data)\n", + "\n", + " return d" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def flatten(d):\n", + " items = []\n", + " for k, v in d.items(): # iterate through the dictionary\n", + " items.append(k) # add the key to the list\n", + " if isinstance(v, dict):# if the value is a dictionary, call the function recursively\n", + " items.extend(flatten(v))\n", + " else:\n", + " items.append(v)\n", + " return items" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def transform_nested_dict_to_tree(d, parent_label=None, parent_value=None):\n", + " result = []\n", + " for key, value in d.items():\n", + " label = key\n", + " if parent_label:\n", + " label = f\"{parent_label} , {key}\"\n", + " children = []\n", + " if value:\n", + " children = transform_nested_dict_to_tree(value, label, key)\n", + " if children:\n", + " result.append({\"label\": key, \"value\": label, \"children\": children})\n", + " else:\n", + " result.append({\"label\": key, \"value\": label})\n", + " return result" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# JSON file EFO" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### organism part" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "d=dict()\n", + "orgpart_dict = get_children('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\ontology\\\\efo.json', \"http://www.ebi.ac.uk/efo/EFO_0000635\",'organism part', d)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "orgpart_dict = orgpart_dict['organism part']" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "orgpart_dict['Not available'] = {}" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['umbilical cord', 'seminal fluid', 'primordium', 'abdominal cavity', 'peritoneal cavity', 'sensory system', 'bodily fluid', 'nasal cavity', 'retroperitoneal space', 'mushroom body', 'saliva', 'decidua basalis', 'meningeal cluster', 'vasculature', 'upper urinary tract', 'embryonic structure', 'head capsule', 'endocrine system', 'renal pelvis/ureter', 'tegmentum', 'venom', 'excreta', 'anatomical structure', 'immune system', 'mediastinum', 'early telencephalic vesicle'])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "orgpart_dict['organism part']['anatomical entity'].keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "# save the dictionary as a pickle\n", + "with open('orgpart_dict.pickle', 'wb') as handle:\n", + " pickle.dump(orgpart_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "#load pickle into dict\n", + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\onto_dicts\\\\organismpart_dict.pickle', 'rb') as handle:\n", + " orgpart_dict = pickle.load(handle)\n", + "# make streamlit tree\n", + "organismpart_nodes = transform_nested_dict_to_tree(orgpart_dict['organism part'])\n", + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\onto_dicts\\\\organismpart_nodes.pickle', 'wb') as handle:\n", + " pickle.dump(organismpart_nodes, handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + "# flatten into a list of elements\n", + "all_organismpart_elements = flatten(orgpart_dict['organism part'])\n", + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\onto_dicts\\\\all_organismpart_elements.pickle', 'wb') as handle:\n", + " pickle.dump(all_organismpart_elements, handle, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\onto_dicts\\\\all_organismpart_elements.pickle', 'rb') as handle:\n", + " x = pickle.load(handle)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Stored all_organism_part_dict_elements as gzipped json'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# make streamlit tree\n", + "orgpart_nodes = transform_nested_dict_to_tree(orgpart_dict)\n", + "all_orgpart_elements = flatten(orgpart_dict)\n", + "store_as_gzipped_json(orgpart_dict, \"organism_part_dict\")\n", + "store_as_gzipped_json(orgpart_nodes, \"organism_part_dict_nodes\")\n", + "store_as_gzipped_json(all_orgpart_elements, \"all_organism_part_dict_elements\")" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "26865" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(all_organismpart_elements)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### for cell types" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "d=dict()\n", + "celltype_dict = get_children('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\ontology\\\\efo.json', \"http://www.ebi.ac.uk/efo/EFO_0000324\",'cell type', d)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "celltype_dict = celltype_dict['cell type']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "celltype_dict['Not available'] = {}" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['epithelial cell', 'embryonic cell (metazoa)', 'photosynthetic cell', 'mouse erythroleukemia cell', 'clear cell', 'somatic cell', 'fungal cell', 'prokaryotic cell', 'immortal cell line cell', 'neoplastic cell', 'integumental cell', 'diploid cell', 'lung cancer cell', 'follicular dendritic cell', 'mantle cell', 'glial brain cell', 'merkel cell', 'mouse neural progenitor cell', 'electrically active cell', 'ligament cell', 'bone marrow cell', 'disease cell type', 'inferred cell type', 'plant cell', 'secretory cell', 'stem cell', 'pancreatic cell', 'nervous system cell', 'infected cell', 'non-terminally differentiated cell', 'experimental cell', 'musculo-skeletal system cell', 'reproductive system cell', 'Not available'])" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "celltype_dict.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Stored all_celltype_elements as gzipped json'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "celltype_nodes = transform_nested_dict_to_tree(celltype_dict)\n", + "all_celltype_elements = flatten(celltype_dict)\n", + "store_as_gzipped_json(celltype_dict, \"cell_type_dict\")\n", + "store_as_gzipped_json(celltype_nodes, \"cell_type_nodes\")\n", + "store_as_gzipped_json(all_celltype_elements, \"all_cell_type_elements\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### for disease 'characteristics[disease]',==> ontology, EFO:0000408" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "d=dict()\n", + "disease_dict = get_children('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\ontology\\\\efo.json', \"http://www.ebi.ac.uk/efo/EFO_0000408\",'disease', d)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "43" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(disease_dict['disease'].keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "import gzip" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "disease_dict['normal'] = {}\n", + "disease_dict['Not available'] = {}" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['disease', 'normal', 'Not available'])" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "disease_dict.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "# make streamlit tree\n", + "disease_nodes = transform_nested_dict_to_tree(disease_dict)\n", + "all_disease_elements = flatten(disease_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Stored all_disease_elements as gzipped json'" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "store_as_gzipped_json(disease_dict, \"disease_dict\")\n", + "store_as_gzipped_json(disease_nodes, \"disease_nodes\")\n", + "store_as_gzipped_json(all_disease_elements, \"all_disease_elements\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### for developmental stage" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "d=dict()\n", + "develop_dict = get_children('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\ontology\\\\efo.json', \"http://www.ebi.ac.uk/efo/EFO_0000399\",'developmental stage', d)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "develop_dict = develop_dict['developmental stage']" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "develop_dict['Not available'] = {}" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Stored all_developmental_stage_elements as gzipped json'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "developmental_stage_nodes = transform_nested_dict_to_tree(develop_dict)\n", + "all_developmental_stage_elements = flatten(develop_dict)\n", + "store_as_gzipped_json(develop_dict, \"developmental_stage_dict\")\n", + "store_as_gzipped_json(developmental_stage_nodes, \"developmental_stage_nodes\")\n", + "store_as_gzipped_json(all_developmental_stage_elements, \"all_developmental_stage_elements\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## for organism" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "d=dict()\n", + "org_dict = get_children('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\ontology\\\\efo.json', \"http://purl.obolibrary.org/obo/OBI_0100026\",'organism', d)\n", + "org_dict = org_dict['organism']\n", + "org_dict['Not available'] = {}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Stored all_organism_elements as gzipped json'" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# make streamlit tree\n", + "org_nodes = transform_nested_dict_to_tree(org_dict)\n", + "all_org_elements = flatten(org_dict)\n", + "store_as_gzipped_json(org_dict, \"organism_dict\")\n", + "store_as_gzipped_json(org_nodes, \"organism_nodes\")\n", + "store_as_gzipped_json(all_org_elements, \"all_organism_elements\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# save the dictionary as a pickle\n", + "\n", + "with open('org_dict.pickle', 'wb') as handle:\n", + " pickle.dump(org_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## for ancestry" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d=dict()\n", + "ancestry_dict = get_children('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\ontology\\\\efo.json', \"http://purl.obolibrary.org/obo/HANCESTRO_0004\",'ancestry category', d)\n", + "len(ancestry_dict['ancestry category'].keys())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# save the dictionary as a pickle\n", + "\n", + "with open('ancestry_dict.pickle', 'wb') as handle:\n", + " pickle.dump(ancestry_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "ancestry_nodes = transform_nested_dict_to_tree(ancestry_dict['ancestry category'])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "with open('ancestry_nodes.pickle', 'wb') as handle:\n", + " pickle.dump(ancestry_nodes, handle, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "all_ancestry_elements = flatten(ancestry_dict['ancestry category'])\n", + "with open('all_ancestry_elements.pickle', 'wb') as handle:\n", + " pickle.dump(all_ancestry_elements, handle, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "#load pickle into dict\n", + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\onto_dicts\\\\ancestry_dict.pickle', 'rb') as handle:\n", + " ancestry_dict = pickle.load(handle)\n", + "# make streamlit tree\n", + "develop_nodes = transform_nested_dict_to_tree(ancestry_dict)\n", + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\onto_dicts\\\\ancestry_nodes.pickle', 'wb') as handle:\n", + " pickle.dump(develop_nodes, handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + "# flatten into a list of elements\n", + "all_develop_elements = flatten(ancestry_dict)\n", + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\onto_dicts\\\\all_ancestry_elements.pickle', 'wb') as handle:\n", + " pickle.dump(all_celltype_elements, handle, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## for cell line" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "d=dict()\n", + "cell_dict = get_children('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\ontology\\\\efo.json', \"http://purl.obolibrary.org/obo/CL_0000000\",'cell', d)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "cell_dict = cell_dict['cell']\n", + "cell_dict['Not available'] = {}" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "cell_nodes = transform_nested_dict_to_tree(cell_dict)\n", + "all_cell_elements = flatten(cell_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Stored all_cell_line_elements as gzipped json'" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# make streamlit tree\n", + "cell_nodes = transform_nested_dict_to_tree(cell_dict)\n", + "all_cell_elements = flatten(cell_dict)\n", + "store_as_gzipped_json(cell_dict, \"cell_line_dict\")\n", + "store_as_gzipped_json(cell_nodes, \"cell_line_nodes\")\n", + "store_as_gzipped_json(all_cell_elements, \"all_cell_line_elements\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## for enrichment" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "d=dict()\n", + "enrichment_dict = get_children('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\ontology\\\\efo.json', \"http://www.ebi.ac.uk/efo/EFO_0009090\",'enrichment process', d)\n", + "enrichment_dict['Not available'] = {}" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Stored all_enrichment_elements as gzipped json'" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# make streamlit tree\n", + "enrichment_nodes = transform_nested_dict_to_tree(enrichment_dict)\n", + "all_enrichment_elements = flatten(enrichment_dict)\n", + "store_as_gzipped_json(enrichment_dict, \"enrichment_dict\")\n", + "store_as_gzipped_json(enrichment_nodes, \"enrichment_nodes\")\n", + "store_as_gzipped_json(all_enrichment_elements, \"all_enrichment_elements\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['isolation of cell population', 'cell size selection', 'density gradient centrifugation', 'magnetic affinity cell sorting'])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "enrichment_dict['enrichment process']['sample enrichment'].keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "enrichment_nodes = transform_nested_dict_to_tree(enrichment_dict)\n", + "all_enrichment_elements = flatten(enrichment_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "#save dict as pickle\n", + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\onto_dicts\\\\enrichment_dict.pickle', 'wb') as handle:\n", + " pickle.dump(enrichment_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + "# make streamlit tree\n", + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\onto_dicts\\\\enrichment_nodes.pickle', 'wb') as handle:\n", + " pickle.dump(enrichment_nodes, handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + "# flatten into a list of elements\n", + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\onto_dicts\\\\all_enrichment_elements.pickle', 'wb') as handle:\n", + " pickle.dump(all_enrichment_elements, handle, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "sdrf", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "22ea4e8e4bc62f1f2c468860a17a62e47bb896f26c043965a0be0ae51df573cc" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Home.py b/Home.py new file mode 100644 index 0000000..272a923 --- /dev/null +++ b/Home.py @@ -0,0 +1,137 @@ +import streamlit as st +import pandas as pd +import re +import numpy as np + +import ParsingModule +import warnings +warnings.filterwarnings("ignore") +import os +import json +import gzip +local_dir = os.path.dirname(__file__) + +st.set_page_config( + page_title="SDRF annotation tool", + layout="wide", + page_icon="๐Ÿงช", + menu_items={ + "Get help": "https://github.ugent.be/tineclae/SDRF_GUI", + "Report a bug": "https://github.ugent.be/tineclae/SDRF_GUI", + }, +) + +#get local directory using os, and add the data folder to the path + + + +# use streamlit cache data to load gzipped jsons files from folder +@st.cache_data +def load_data(): + local_dir = os.path.dirname(__file__) + folder_path = os.path.join(local_dir, "data") + unimod_path = os.path.join(local_dir, "ontology", "unimod.csv") + data = {} + for filename in os.listdir(folder_path): + # do not load the files containing the following names: archae, bacteria, eukaryota, virus, unclassified, other sequences + if re.search(r"archaea|bacteria|eukaryota|virus|unclassified|other sequences", filename): + continue + file_path = os.path.join(folder_path, filename) + if filename.endswith(".json.gz"): + try: + with gzip.open(file_path, "rb") as f: + file_data = json.load(f) + filename_key = filename.replace(".json.gz", "") + data[filename_key] = file_data + except gzip.BadGzipFile: + st.write(f"Error reading file {file_path}: not a gzipped file") + else: + st.write(f"Skipping file {file_path}: not a gzipped file") + + unimod = pd.read_csv(unimod_path, sep="\t") + return data, unimod + + +data_dict, unimod = load_data() +if data_dict: + st.success(f"*Data was loaded*", icon="โœ…") +else: + st.error("Failed loading data", icon="โŒ") +if "data_dict" not in st.session_state: + st.session_state["data_dict"] = data_dict +if "unimod" not in st.session_state: + st.session_state["unimod"] = unimod + + +st.title("Welcome to the SDRF annotation tool") +st.markdown( + """The Sample and Data Relationship Format will allow your data to reach its full potential. +By having all the metadata available and machine readable, your data will be able to reach its full impact by being studied by other researchers. """ +) +st.markdown( + """ This tool will help you to annotate your data with the correct metadata in several steps for a maximum of 500 samples. +- Step 1: select a default SDRF file to start from based on the species of your sample +- Step 2: if you have a local metadata file, you can upload it to map to the SDRF file +- Step 3: add labelling information +- Step 4: fill in required and additional columns using an ontology-based input system \n +For atypical experiment types, you can check community suggested columns in *5. Experiment types*. +""" +) + +st.markdown("""You are able to download the intermediate file at any given timepoint, so you can come back to the other steps whenever is suitable for you. +If this is the case right now, please upload your intermediate SDRF file here:""") + +upload_df = st.file_uploader( + "Upload intermediate SDRF file", type=["tsv"], accept_multiple_files=False, help='Upload a previously saved SDRF file. It should be in tsv format and should not contain more than 500 samples' +) +if upload_df is not None: + template_df = pd.read_csv(upload_df, sep='\t') + if template_df.shape[0]>500: + st.error('Too many samples, please upload a maximum of 500 samples') + else: + st.write(template_df) + st.session_state["template_df"] = template_df + +st.markdown("""In need of some inspiration? Download this example SDRF file to get an idea of the required output""") +st.download_button("Download example SDRF", data=f'{local_dir}/example_SDRF.tsv', file_name="example.sdrf.tsv") + + +st.header("If you're starting from scratch, start here") +st.subheader("Select your species") +species = ["","human", "cell-line", "default", "nonvertebrates", "plants", "vertebrates"] +selected_species = st.selectbox("""Select a species for the SDRF template which will contain the basic colummns to fill in for this specific species. +If your species is not in the drop down list, you can always use the default template.""", +species, help="This species selection will impact the default columns present in your SDRF template. You can always add more columns in step *Additional columns*.") + +if selected_species != "": + folder_path = os.path.join(local_dir, "templates") + # Load the corresponding CSV file based on the selected species + template_df = pd.read_csv( + f"{folder_path}/sdrf-{selected_species}.tsv", + sep="\t", + ) + template_df["comment[modification parameters]"] = np.nan + template_df["comment[fragment mass tolerance]"] = np.nan + template_df["comment[precursor mass tolerance]"] = np.nan + + # Ask user to upload filenames of their samples + filenames = [] + uploaded_names = st.text_input("Input raw file names as a comma separated list", help="The raw file names will be input in the comment[data file] column and are the basis of your SDRF file. Input maximum 500 raw files") + if uploaded_names is not None: + uploaded_names = re.sub(" ", "", uploaded_names) + uploaded_names = uploaded_names.split(",") + filenames.append(uploaded_names) + if len(filenames[0]) > 500: + st.error('Too many samples, please upload a maximum of 500 samples') + else: + st.write(f"Added filenames: {filenames[0]}") + ## Store filenames in the dataframe + template_df["comment[data file]"] = filenames[0] + st.session_state["template_df"] = template_df + + ## Show the data in a table + st.write(template_df) + if "template_df" not in st.session_state: + st.session_state["template_df"] = template_df + with st.sidebar: + download = st.download_button("Press to download SDRF file",ParsingModule.convert_df(template_df), "intermediate_SDRF.sdrf.tsv", help="download your SDRF file") diff --git a/OBO_parser.ipynb b/OBO_parser.ipynb new file mode 100644 index 0000000..c2d7ddb --- /dev/null +++ b/OBO_parser.ipynb @@ -0,0 +1,1601 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pronto\n", + "from pronto import Ontology\n", + "from collections import defaultdict\n", + "import json\n", + "import gzip\n", + "from ParsingModule import transform_nested_dict_to_tree, flatten" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def get_obo_subclasses_and_elements(onto, obo_id, obo_label,obo_elements, d=None, distance=1):\n", + " obo_elements.add(obo_label)\n", + " if d is None:\n", + " d = defaultdict(dict)\n", + "\n", + " \"\"\"\n", + " Get all subclasses of a given obo_id\n", + " :param obo_id: obo_id to search for\n", + " :param distance: distance to search for subclasses\n", + " :return: list of subclasses\n", + " \"\"\"\n", + " subclasses = list(onto[obo_id].subclasses(distance=1))\n", + " if len(subclasses) > 1:\n", + " d[obo_label] = {}\n", + " for i in subclasses[1:]:\n", + " obo_id = i.id\n", + " obo_label = i.name\n", + " obo_elements.add(obo_label)\n", + " d[obo_label] = get_obo_subclasses(onto, obo_id, obo_label, obo_elements, defaultdict(dict), distance=1)\n", + " else:\n", + " d = {}\n", + " d = remove_duplicate_values(d)\n", + " return d, obo_elements\n", + "\n", + "def get_obo_subclasses(onto, obo_id, obo_label, d=None, distance=1):\n", + "\n", + " if d is None:\n", + " d = defaultdict(dict)\n", + "\n", + " \"\"\"\n", + " Get all subclasses of a given obo_id\n", + " :param obo_id: obo_id to search for\n", + " :param distance: distance to search for subclasses\n", + " :return: list of subclasses\n", + " \"\"\"\n", + " subclasses = list(onto[obo_id].subclasses(distance=1))\n", + " if len(subclasses) > 1:\n", + " d[obo_label] = {}\n", + " for i in subclasses[1:]:\n", + " obo_id = i.id\n", + " obo_label = i.name\n", + " d[obo_label] = get_obo_subclasses(onto, obo_id, obo_label, defaultdict(dict), distance=1)\n", + " else:\n", + " d = {}\n", + " d = remove_duplicate_values(d)\n", + " return d\n", + " \n", + "def remove_duplicate_values(d):\n", + " for k, v in d.items():\n", + " if isinstance(v, dict):\n", + " remove_duplicate_values(v)\n", + " if k in v:\n", + " del v[k]\n", + "\n", + " return d" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def store_as_gzipped_json(data, filename):\n", + " \"\"\"\"Given a datafile to store and the filename, this function stores the data as a gzipped json file in C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\data\"\"\"\n", + " path = \"C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\data\\\\\" + filename + \".json.gz\"\n", + " with gzip.open(path, 'wt') as f:\n", + " json.dump(data, f)\n", + " return(f\"Stored {filename} as gzipped json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def open_gzipped_json(filename):\n", + " path = \"C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\data\" + filename + \".json.gz\"\n", + " with gzip.open(path, 'rt') as f:\n", + " data = json.load(f)\n", + " return(data)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MS" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "'comment[cleavage agent details]', ==> MS:1001045
\n", + "'comment[instrument]',==>ontology MS:1000031
\n", + "comment[dissociation method] MS:1000044" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:289: SyntaxWarning: contains text but no `xsd:datatype`\n", + " meta.annotations.add(self._extract_literal_pv(child))\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:113: SyntaxWarning: unknown element in `owl:ObjectProperty`: \n", + " self._extract_object_property(prop, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\ontology.py:283: NotImplementedWarning: cannot process plain `owl:AnnotationProperty`\n", + " cls(self).parse_from(_handle) # type: ignore\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}BFO_0000179\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}BFO_0000180\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}IAO_0000112\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}IAO_0000116\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}IAO_0000600\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}IAO_0000601\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}IAO_0000602\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}IAO_0000118\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}IAO_0000111\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: several names found for 'CL:0000000', using 'cell'\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: cannot process `rdfs:subClassOf` in this context\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}IAO_0000117\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}IAO_0000119\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}IAO_0000232\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://www.w3.org/2002/07/owl#}equivalentClass\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: could not extract target IRI from `owl:Restriction`\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://www.w3.org/2000/01/rdf-schema#}seeAlso\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}STATO_0000032\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}STATO_0000404\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}STATO_0000041\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.org/dc/elements/1.1/}source\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}IAO_0000233\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://purl.obolibrary.org/obo/}STATO_0000391\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\parsers\\rdfxml.py:117: SyntaxWarning: unknown element in `owl:Class`: {http://usefulinc.com/ns/doap#}bug-database\n", + " self._extract_term(class_, curies)\n", + "c:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\ontology.py:283: SyntaxWarning: unknown axiom property: 'http://www.w3.org/2000/01/rdf-schema#subClassOf'\n", + " cls(self).parse_from(_handle) # type: ignore\n" + ] + } + ], + "source": [ + "ms = Ontology(\"C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\ontology\\\\psi-ms.obo\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cleavage agent details: a list, no substructure" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cleavage_list= []\n", + "i = list(ms['MS:1001045'].subclasses())\n", + "for _ in i:\n", + " cleavage_list.append(_.name)\n", + "cleavage_list = cleavage_list[1:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cleavage_agent_dict = get_obo_subclasses(ms, 'MS:1001045', 'cleavage agent name')\n", + "cleavage_agent_dict.pop('cleavage agent name')\n", + "cleavage_agent_dict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cleavage_agent_nodes = transform_nested_dict_to_tree(cleavage_agent_dict)\n", + "all_cleavage_agent_elements = flatten(cleavage_agent_dict)\n", + "store_as_gzipped_json(cleavage_agent_dict, \"cleavage_agent_dict\")\n", + "store_as_gzipped_json(cleavage_agent_nodes, \"cleavage_agent_nodes\")\n", + "store_as_gzipped_json(all_cleavage_agent_elements, \"all_cleavage_agent_elements\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "store_as_gzipped_json(cleavage_list, 'cleavage_list')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instrument model. Substructure" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "instrument_dict = get_obo_subclasses(ms, 'MS:1000031', 'instrument model', distance=1)\n", + "instrument_dict.pop('instrument model')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "instrument_dict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "instrument_nodes = transform_nested_dict_to_tree(instrument_dict)\n", + "all_instrument_elements = flatten(instrument_dict)\n", + "store_as_gzipped_json(instrument_dict, \"instrument_dict\")\n", + "store_as_gzipped_json(instrument_nodes, \"instrument_nodes\")\n", + "store_as_gzipped_json(all_instrument_elements, \"all_instrument_elements\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dissociation method:MS:1000044" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{}" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dissociation_dict = get_obo_subclasses(ms, 'MS:1000044', 'dissociation method', distance=1)\n", + "dissociation_dict.pop('dissociation method')" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "collections.defaultdict" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dissociation_dict['collision-induced dissociation']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dissociation_nodes = transform_nested_dict_to_tree(dissociation_dict)\n", + "all_dissociation_elements = flatten(dissociation_dict)\n", + "store_as_gzipped_json(dissociation_dict, \"dissociation_dict\")\n", + "store_as_gzipped_json(dissociation_nodes, \"dissociation_nodes\")\n", + "store_as_gzipped_json(all_dissociation_elements, \"all_dissociation_elements\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PRIDE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# a datetime error appeared, we don't need the creation time so let's remove it\n", + "file1 = open('pride_cv.obo',\n", + " 'r')\n", + " \n", + "# defining object file2 to\n", + "# open GeeksforGeeksUpdated file\n", + "# in write mode\n", + "file2 = open('pride_cv_updated.obo',\n", + " 'w')\n", + " \n", + "# reading each line from original\n", + "# text file\n", + "for line in file1.readlines():\n", + " \n", + " # reading all lines that do not\n", + " # begin with \"TextGenerator\"\n", + " if not (line.startswith('creat')):\n", + " \n", + " # storing only those lines that\n", + " # do not begin with \"TextGenerator\"\n", + " file2.write(line)\n", + " \n", + "# close and save the files\n", + "file2.close()\n", + "file1.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pride = Ontology(\"C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\ontology\\\\pride_cv_updated.obo\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "if fractionated: comment[ fractionation method]PRIDE:0000550
\n", + "'comment[label]', ==> label free, TMT channelsPRIDE:0000514" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fractionation method " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fractionation_dict = get_obo_subclasses(pride, 'PRIDE:0000550', 'Fractionation method', distance=1)\n", + "fractionation_dict\n", + "# PRIDE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fractionation_dict.pop('Fractionation method')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fractionation_nodes = transform_nested_dict_to_tree(fractionation_dict)\n", + "all_fractionation_elements = flatten(fractionation_dict)\n", + "store_as_gzipped_json(fractionation_dict, \"fractionation_dict\")\n", + "store_as_gzipped_json(fractionation_nodes, \"fractionation_nodes\")\n", + "store_as_gzipped_json(all_fractionation_elements, \"all_fractionation_elements\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Label" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "label_dict = get_obo_subclasses(pride, 'PRIDE:0000514', 'Label', distance=1)\n", + "label_dict\n", + "# PRIDE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "label_dict.pop('Label')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "label_dict[\"Stable isotope dimethyl labeling\"] = {}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "label_nodes = transform_nested_dict_to_tree(label_dict)\n", + "all_label_elements = flatten(label_dict)\n", + "store_as_gzipped_json(label_dict, \"label_dict\")\n", + "store_as_gzipped_json(label_nodes, \"label_nodes\")\n", + "store_as_gzipped_json(all_label_elements, \"all_label_elements\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "reduction reagent \n", + "alkylation reagent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "reduction_dict = get_obo_subclasses(pride, 'PRIDE:0000607', 'reduction reagent', distance=1)\n", + "reduction_dict.pop(\"reduction reagent\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "reduction_nodes = transform_nested_dict_to_tree(reduction_dict)\n", + "all_reduction_elements = flatten(reduction_dict)\n", + "store_as_gzipped_json(reduction_dict, \"reduction_dict\")\n", + "store_as_gzipped_json(reduction_nodes, \"reduction_nodes\")\n", + "store_as_gzipped_json(all_reduction_elements, \"all_reduction_elements\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "alkylation_dict = get_obo_subclasses(pride, 'PRIDE:0000598', 'alkylation reagent', distance=1)\n", + "alkylation_dict.pop(\"alkylation reagent\")\n", + "alkylation_nodes = transform_nested_dict_to_tree(alkylation_dict)\n", + "all_alkylation_elements = flatten(alkylation_dict)\n", + "store_as_gzipped_json(alkylation_dict, \"alkylation_dict\")\n", + "store_as_gzipped_json(alkylation_nodes, \"alkylation_nodes\")\n", + "store_as_gzipped_json(all_alkylation_elements, \"all_alkylation_elements\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# NCBITaxon" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "ncbi = Ontology(\"C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\ontology\\\\ncbitaxon.obo\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# find all end leafs in the obo tree and store them in a list\n", + "def get_obo_leafs(obo, root, root_name):\n", + " leafs = []\n", + " i = list(obo[root].subclasses())\n", + " for _ in i:\n", + " if len(_.subclasses()) == 0:\n", + " leafs.append(_.name)\n", + " else:\n", + " leafs.extend(get_obo_leafs(obo, _, _.name))\n", + " return leafs" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mStopIteration\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32mC:\\Python39\\Lib\\inspect.py:2927\u001b[0m, in \u001b[0;36mSignature._bind\u001b[1;34m(self, args, kwargs, partial)\u001b[0m\n\u001b[0;32m 2926\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 2927\u001b[0m arg_val \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39;49m(arg_vals)\n\u001b[0;32m 2928\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m:\n\u001b[0;32m 2929\u001b[0m \u001b[39m# No more positional arguments\u001b[39;00m\n", + "\u001b[1;31mStopIteration\u001b[0m: ", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[12], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[39mlist\u001b[39;49m(ncbi[\u001b[39m'\u001b[39;49m\u001b[39mNCBITaxon:1\u001b[39;49m\u001b[39m'\u001b[39;49m]\u001b[39m.\u001b[39;49msubclasses())\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\logic\\lineage.py:296\u001b[0m, in \u001b[0;36mLineageIterator.__next__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 294\u001b[0m \u001b[39mif\u001b[39;00m id_ \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 295\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m\n\u001b[1;32m--> 296\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_entity(id_)\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\logic\\lineage.py:304\u001b[0m, in \u001b[0;36mTermIterator._get_entity\u001b[1;34m(self, id)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_get_entity\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39mid\u001b[39m):\n\u001b[1;32m--> 304\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_ontology\u001b[39m.\u001b[39;49mget_term(\u001b[39mid\u001b[39;49m)\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\pronto\\utils\\meta.py:86\u001b[0m, in \u001b[0;36mtypechecked.__call__..newfunc\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[39m@functools\u001b[39m\u001b[39m.\u001b[39mwraps(func)\n\u001b[0;32m 84\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mnewfunc\u001b[39m(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[0;32m 85\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_disable:\n\u001b[1;32m---> 86\u001b[0m callargs \u001b[39m=\u001b[39m signature\u001b[39m.\u001b[39mbind(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\u001b[39m.\u001b[39marguments\n\u001b[0;32m 87\u001b[0m \u001b[39mfor\u001b[39;00m name, value \u001b[39min\u001b[39;00m callargs\u001b[39m.\u001b[39mitems():\n\u001b[0;32m 88\u001b[0m \u001b[39mif\u001b[39;00m name \u001b[39min\u001b[39;00m hints:\n", + "File \u001b[1;32mC:\\Python39\\Lib\\inspect.py:3050\u001b[0m, in \u001b[0;36mSignature.bind\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 3045\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mbind\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m/\u001b[39m, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[0;32m 3046\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Get a BoundArguments object, that maps the passed `args`\u001b[39;00m\n\u001b[0;32m 3047\u001b[0m \u001b[39m and `kwargs` to the function's signature. Raises `TypeError`\u001b[39;00m\n\u001b[0;32m 3048\u001b[0m \u001b[39m if the passed arguments can not be bound.\u001b[39;00m\n\u001b[0;32m 3049\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 3050\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_bind(args, kwargs)\n", + "File \u001b[1;32mC:\\Python39\\Lib\\inspect.py:2928\u001b[0m, in \u001b[0;36mSignature._bind\u001b[1;34m(self, args, kwargs, partial)\u001b[0m\n\u001b[0;32m 2926\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 2927\u001b[0m arg_val \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(arg_vals)\n\u001b[1;32m-> 2928\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;49;00m:\n\u001b[0;32m 2929\u001b[0m \u001b[39m# No more positional arguments\u001b[39;00m\n\u001b[0;32m 2930\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 2931\u001b[0m param \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(parameters)\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "list(ncbi['NCBITaxon:1'].subclasses())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "object of type 'SubclassesHandler' has no len()", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[7], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m leafs_list \u001b[39m=\u001b[39m get_obo_leafs(ncbi, \u001b[39m'\u001b[39;49m\u001b[39mNCBITaxon:1\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39mroot\u001b[39;49m\u001b[39m'\u001b[39;49m)\n", + "Cell \u001b[1;32mIn[6], line 6\u001b[0m, in \u001b[0;36mget_obo_leafs\u001b[1;34m(obo, root, root_name)\u001b[0m\n\u001b[0;32m 4\u001b[0m i \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(obo[root]\u001b[39m.\u001b[39msubclasses())\n\u001b[0;32m 5\u001b[0m \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m i:\n\u001b[1;32m----> 6\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39;49m(_\u001b[39m.\u001b[39;49msubclasses()) \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m 7\u001b[0m leafs\u001b[39m.\u001b[39mappend(_\u001b[39m.\u001b[39mname)\n\u001b[0;32m 8\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "\u001b[1;31mTypeError\u001b[0m: object of type 'SubclassesHandler' has no len()" + ] + } + ], + "source": [ + "leafs_list = get_obo_leafs(ncbi, 'NCBITaxon:1', 'root')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_dict = get_obo_subclasses(ncbi, 'NCBITaxon:1', 'root', distance=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "taxonomy_nodes = transform_nested_dict_to_tree(root_dict)\n", + "all_taxonomy_elements = flatten(root_dict)\n", + "store_as_gzipped_json(root_dict, \"taxonomy_dict\")\n", + "store_as_gzipped_json(taxonomy_nodes, \"taxonomy_nodes\")\n", + "store_as_gzipped_json(all_taxonomy_elements, \"all_taxonomy_elements\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Very large dict, causes too long waiting times ==> split in different taxa?" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "virus_dict = get_obo_subclasses(ncbi, 'NCBITaxon:10239', 'Viruses', distance=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "archaea_dict = get_obo_subclasses(ncbi, 'NCBITaxon:2157', 'Archaea', distance=1)\n", + "bacteria_dict = get_obo_subclasses(ncbi, 'NCBITaxon:2', 'Bacteria', distance=1)\n", + "other_sequences_dict = get_obo_subclasses(ncbi, 'NCBITaxon:28384', 'other sequences', distance=1)\n", + "unclassified_dict = get_obo_subclasses(ncbi, 'NCBITaxon:12908', 'unclassified entries', distance=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "eukaryota_dict = get_obo_subclasses(ncbi, 'NCBITaxon:2759', 'Eukaryota', distance=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "sp = get_obo_subclasses(ncbi, \"NCBITaxon:88918\", \"Trimeniaceae\", distance=1 )" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "sp\n", + "# count unique elements in nested dictionary\n", + "def count_unique_elements(nested_dict):\n", + " unique_elements = []\n", + " for key, value in nested_dict.items():\n", + " if isinstance(value, dict):\n", + " unique_elements.append(key)\n", + " unique_elements.extend(count_unique_elements(value))\n", + " else:\n", + " unique_elements.append(key)\n", + " return list(set(unique_elements))\n", + "sp_elem = count_unique_elements(sp)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['Eukaryota', 'Breviatea', 'Hemimastigophora', 'Rhodelphea', 'Haptista', 'CRuMs', 'Metamonada', 'Discoba', 'Eukaryota incertae sedis', 'Ancyromonadida', 'Sar', 'Rhodophyta', 'Malawimonadida', 'Provora', 'Cryptophyceae', 'Viridiplantae', 'Opisthokonta', 'Glaucocystophyceae', 'unclassified eukaryotes', 'Apusozoa', 'Amoebozoa', 'environmental samples ', ('environmental samples ',), 'environmental samples'])" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eukaryota_dict.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1679405\n" + ] + } + ], + "source": [ + "print(len(euk_elem))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'eukaryota_dict' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[10], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m archaea_dict\u001b[39m.\u001b[39mpop(\u001b[39m'\u001b[39m\u001b[39mArchaea\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 2\u001b[0m bacteria_dict\u001b[39m.\u001b[39mpop(\u001b[39m'\u001b[39m\u001b[39mBacteria\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m----> 3\u001b[0m eukaryota_dict\u001b[39m.\u001b[39mpop(\u001b[39m'\u001b[39m\u001b[39mEukaryota\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 4\u001b[0m other_sequences_dict\u001b[39m.\u001b[39mpop(\u001b[39m'\u001b[39m\u001b[39mother sequences\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 5\u001b[0m unclassified_dict\u001b[39m.\u001b[39mpop(\u001b[39m'\u001b[39m\u001b[39munclassified entries\u001b[39m\u001b[39m'\u001b[39m)\n", + "\u001b[1;31mNameError\u001b[0m: name 'eukaryota_dict' is not defined" + ] + } + ], + "source": [ + "archaea_dict.pop('Archaea')\n", + "bacteria_dict.pop('Bacteria')\n", + "eukaryota_dict.pop('Eukaryota')\n", + "other_sequences_dict.pop('other sequences')\n", + "unclassified_dict.pop('unclassified entries')\n", + "virus_dict.pop('Viruses')" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'virus_dict' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m taxonomies \u001b[39m=\u001b[39m [virus_dict, archaea_dict, bacteria_dict, eukaryota_dict, other_sequences_dict, unclassified_dict]\t\n\u001b[0;32m 2\u001b[0m names \u001b[39m=\u001b[39m [\u001b[39m\"\u001b[39m\u001b[39mvirus\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39marchaea\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mbacteria\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39meukaryota\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mother_sequences\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39munclassified\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m 3\u001b[0m \u001b[39mfor\u001b[39;00m i, name \u001b[39min\u001b[39;00m \u001b[39mzip\u001b[39m(taxonomies, names):\n", + "\u001b[1;31mNameError\u001b[0m: name 'virus_dict' is not defined" + ] + } + ], + "source": [ + "taxonomies = [virus_dict, archaea_dict, bacteria_dict, eukaryota_dict, other_sequences_dict, unclassified_dict]\t\n", + "names = [\"virus\", \"archaea\", \"bacteria\", \"eukaryota\", \"other_sequences\", \"unclassified\"]\n", + "for i, name in zip(taxonomies, names):\n", + " nodes = transform_nested_dict_to_tree(i)\n", + " elements = flatten(i)\n", + " store_as_gzipped_json(i, f\"{name}_dict\")\n", + " store_as_gzipped_json(nodes, f\"{name}_nodes\")\n", + " store_as_gzipped_json(set(elements), f\"all_{name}_elements\")\n", + " print(f\"{name} done\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "unhashable type: 'collections.defaultdict'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[8], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m eu_elem \u001b[39m=\u001b[39m flatten(eukaryota_dict)\n", + "File \u001b[1;32mc:\\Users\\tinec\\OneDrive - UGent\\git\\SDRF_GUI\\ParsingModule.py:122\u001b[0m, in \u001b[0;36mflatten\u001b[1;34m(d)\u001b[0m\n\u001b[0;32m 120\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 121\u001b[0m items\u001b[39m.\u001b[39mappend(v)\n\u001b[1;32m--> 122\u001b[0m items \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(\u001b[39mset\u001b[39;49m(items))\n\u001b[0;32m 123\u001b[0m \u001b[39mreturn\u001b[39;00m items\n", + "\u001b[1;31mTypeError\u001b[0m: unhashable type: 'collections.defaultdict'" + ] + } + ], + "source": [ + "eu_elem = flatten(eukaryota_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "from sys import getsizeof" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "67109080" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "getsizeof(euk_elem)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "13533400" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "getsizeof(species_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1184" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "getsizeof(eukaryota_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-53673784" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "getsizeof(eu_elem) - getsizeof(set(eu_elem))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "13434112" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "getsizeof(eu_elem) - getsizeof(eukaryota_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "import networkx\n", + "import obonet\n", + "\n", + "graph = obonet.read_obo(\"C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\ontology\\\\ncbitaxon.obo\")" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are 2490774 nodes\n", + "There are 2490772 edges\n" + ] + } + ], + "source": [ + "print(f\"There are {len(graph)} nodes\") #number of nodes\n", + "print(f\"There are {graph.number_of_edges()} edges\") #number of edges" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "networkx.is_directed_acyclic_graph(graph)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'name': 'Haemoproteidae',\n", + " 'namespace': 'ncbi_taxonomy',\n", + " 'xref': ['GC_ID:1'],\n", + " 'is_a': ['NCBITaxon:5819'],\n", + " 'property_value': ['has_rank NCBITaxon:family']}" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graph.nodes[\"NCBITaxon:1639121\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "eukaryota_list = []\n", + "for i in networkx.ancestors(graph,\"NCBITaxon:2759\"):\n", + " eukaryota_list.append(graph.nodes[i][\"name\"])\n", + "\n", + "virus_list = []\n", + "for i in networkx.ancestors(graph,\"NCBITaxon:10239\"):\n", + " virus_list.append(graph.nodes[i][\"name\"])\n", + "\n", + "bacteria_list = []\n", + "for i in networkx.ancestors(graph,\"NCBITaxon:2\"):\n", + " bacteria_list.append(graph.nodes[i][\"name\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "236153 541922 1679404\n" + ] + } + ], + "source": [ + "print(len(virus_list), len(bacteria_list), len(eukaryota_list))" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "13533400" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "getsizeof(species_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1679405" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(euk_elem)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "sdrf", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "22ea4e8e4bc62f1f2c468860a17a62e47bb896f26c043965a0be0ae51df573cc" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/OWL_parser.ipynb b/OWL_parser.ipynb new file mode 100644 index 0000000..7fbf87d --- /dev/null +++ b/OWL_parser.ipynb @@ -0,0 +1,491 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "* Owlready2 * Warning: optimized Cython parser module 'owlready2_optimized' is not available, defaulting to slower Python implementation\n" + ] + } + ], + "source": [ + "import requests\n", + "import rdflib\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "from rdflib import Graph, Namespace, RDFS\n", + "from owlready2 import get_ontology\n", + "from owlready2 import *\n", + "from rdflib import Graph, Namespace, RDFS" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[2], line 5\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mcollections\u001b[39;00m \u001b[39mimport\u001b[39;00m defaultdict\n\u001b[0;32m 4\u001b[0m g \u001b[39m=\u001b[39m Graph()\n\u001b[1;32m----> 5\u001b[0m g\u001b[39m.\u001b[39;49mparse(\u001b[39m\"\u001b[39;49m\u001b[39mSDRF_GUI/ontology/ncbitaxon.owl\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mformat\u001b[39;49m\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mxml\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[0;32m 7\u001b[0m HANCESTRO \u001b[39m=\u001b[39m Namespace(\u001b[39m\"\u001b[39m\u001b[39mhttp://www.hancestro.org/ontologies/hancestro#\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 9\u001b[0m class_labels \u001b[39m=\u001b[39m defaultdict(\u001b[39mdict\u001b[39m)\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\rdflib\\graph.py:1330\u001b[0m, in \u001b[0;36mGraph.parse\u001b[1;34m(self, source, publicID, format, location, file, data, **args)\u001b[0m\n\u001b[0;32m 1327\u001b[0m parser \u001b[39m=\u001b[39m plugin\u001b[39m.\u001b[39mget(\u001b[39mformat\u001b[39m, Parser)()\n\u001b[0;32m 1328\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 1329\u001b[0m \u001b[39m# TODO FIXME: Parser.parse should have **kwargs argument.\u001b[39;00m\n\u001b[1;32m-> 1330\u001b[0m parser\u001b[39m.\u001b[39mparse(source, \u001b[39mself\u001b[39m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39margs)\n\u001b[0;32m 1331\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mSyntaxError\u001b[39;00m \u001b[39mas\u001b[39;00m se:\n\u001b[0;32m 1332\u001b[0m \u001b[39mif\u001b[39;00m could_not_guess_format:\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\rdflib\\plugins\\parsers\\rdfxml.py:604\u001b[0m, in \u001b[0;36mRDFXMLParser.parse\u001b[1;34m(self, source, sink, **args)\u001b[0m\n\u001b[0;32m 600\u001b[0m content_handler\u001b[39m.\u001b[39mpreserve_bnode_ids \u001b[39m=\u001b[39m preserve_bnode_ids\n\u001b[0;32m 601\u001b[0m \u001b[39m# # We're only using it once now\u001b[39;00m\n\u001b[0;32m 602\u001b[0m \u001b[39m# content_handler.reset()\u001b[39;00m\n\u001b[0;32m 603\u001b[0m \u001b[39m# self._parser.reset()\u001b[39;00m\n\u001b[1;32m--> 604\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_parser\u001b[39m.\u001b[39;49mparse(source)\n", + "File \u001b[1;32mC:\\Python39\\Lib\\xml\\sax\\expatreader.py:111\u001b[0m, in \u001b[0;36mExpatParser.parse\u001b[1;34m(self, source)\u001b[0m\n\u001b[0;32m 109\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreset()\n\u001b[0;32m 110\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_cont_handler\u001b[39m.\u001b[39msetDocumentLocator(ExpatLocator(\u001b[39mself\u001b[39m))\n\u001b[1;32m--> 111\u001b[0m xmlreader\u001b[39m.\u001b[39;49mIncrementalParser\u001b[39m.\u001b[39;49mparse(\u001b[39mself\u001b[39;49m, source)\n\u001b[0;32m 112\u001b[0m \u001b[39mexcept\u001b[39;00m:\n\u001b[0;32m 113\u001b[0m \u001b[39m# bpo-30264: Close the source on error to not leak resources:\u001b[39;00m\n\u001b[0;32m 114\u001b[0m \u001b[39m# xml.sax.parse() doesn't give access to the underlying parser\u001b[39;00m\n\u001b[0;32m 115\u001b[0m \u001b[39m# to the caller\u001b[39;00m\n\u001b[0;32m 116\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_close_source()\n", + "File \u001b[1;32mC:\\Python39\\Lib\\xml\\sax\\xmlreader.py:125\u001b[0m, in \u001b[0;36mIncrementalParser.parse\u001b[1;34m(self, source)\u001b[0m\n\u001b[0;32m 123\u001b[0m buffer \u001b[39m=\u001b[39m file\u001b[39m.\u001b[39mread(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_bufsize)\n\u001b[0;32m 124\u001b[0m \u001b[39mwhile\u001b[39;00m buffer:\n\u001b[1;32m--> 125\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfeed(buffer)\n\u001b[0;32m 126\u001b[0m buffer \u001b[39m=\u001b[39m file\u001b[39m.\u001b[39mread(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_bufsize)\n\u001b[0;32m 127\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclose()\n", + "File \u001b[1;32mC:\\Python39\\Lib\\xml\\sax\\expatreader.py:217\u001b[0m, in \u001b[0;36mExpatParser.feed\u001b[1;34m(self, data, isFinal)\u001b[0m\n\u001b[0;32m 210\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_cont_handler\u001b[39m.\u001b[39mstartDocument()\n\u001b[0;32m 212\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 213\u001b[0m \u001b[39m# The isFinal parameter is internal to the expat reader.\u001b[39;00m\n\u001b[0;32m 214\u001b[0m \u001b[39m# If it is set to true, expat will check validity of the entire\u001b[39;00m\n\u001b[0;32m 215\u001b[0m \u001b[39m# document. When feeding chunks, they are not normally final -\u001b[39;00m\n\u001b[0;32m 216\u001b[0m \u001b[39m# except when invoked from close.\u001b[39;00m\n\u001b[1;32m--> 217\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_parser\u001b[39m.\u001b[39;49mParse(data, isFinal)\n\u001b[0;32m 218\u001b[0m \u001b[39mexcept\u001b[39;00m expat\u001b[39m.\u001b[39merror \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 219\u001b[0m exc \u001b[39m=\u001b[39m SAXParseException(expat\u001b[39m.\u001b[39mErrorString(e\u001b[39m.\u001b[39mcode), e, \u001b[39mself\u001b[39m)\n", + "File \u001b[1;32mC:\\A\\31\\s\\Modules\\pyexpat.c:407\u001b[0m, in \u001b[0;36mStartElement\u001b[1;34m()\u001b[0m\n", + "File \u001b[1;32mC:\\Python39\\Lib\\xml\\sax\\expatreader.py:369\u001b[0m, in \u001b[0;36mExpatParser.start_element_ns\u001b[1;34m(self, name, attrs)\u001b[0m\n\u001b[0;32m 366\u001b[0m newattrs[apair] \u001b[39m=\u001b[39m value\n\u001b[0;32m 367\u001b[0m qnames[apair] \u001b[39m=\u001b[39m qname\n\u001b[1;32m--> 369\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_cont_handler\u001b[39m.\u001b[39;49mstartElementNS(pair, \u001b[39mNone\u001b[39;49;00m,\n\u001b[0;32m 370\u001b[0m AttributesNSImpl(newattrs, qnames))\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\rdflib\\plugins\\parsers\\rdfxml.py:190\u001b[0m, in \u001b[0;36mRDFXMLHandler.startElementNS\u001b[1;34m(self, name, qname, attrs)\u001b[0m\n\u001b[0;32m 188\u001b[0m language \u001b[39m=\u001b[39m parent\u001b[39m.\u001b[39mlanguage\n\u001b[0;32m 189\u001b[0m current\u001b[39m.\u001b[39mlanguage \u001b[39m=\u001b[39m language\n\u001b[1;32m--> 190\u001b[0m current\u001b[39m.\u001b[39;49mstart(name, qname, attrs)\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\rdflib\\plugins\\parsers\\rdfxml.py:388\u001b[0m, in \u001b[0;36mRDFXMLHandler.property_element_start\u001b[1;34m(self, name, qname, attrs)\u001b[0m\n\u001b[0;32m 385\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 386\u001b[0m current\u001b[39m.\u001b[39mpredicate \u001b[39m=\u001b[39m absolutize(name)\n\u001b[1;32m--> 388\u001b[0m \u001b[39mid\u001b[39m \u001b[39m=\u001b[39m atts\u001b[39m.\u001b[39mget(RDFVOC\u001b[39m.\u001b[39;49mID, \u001b[39mNone\u001b[39;00m)\n\u001b[0;32m 389\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mid\u001b[39m \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 390\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m is_ncname(\u001b[39mid\u001b[39m):\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\rdflib\\namespace\\__init__.py:238\u001b[0m, in \u001b[0;36mDefinedNamespaceMeta.__getattr__\u001b[1;34m(cls, name)\u001b[0m\n\u001b[0;32m 237\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__getattr__\u001b[39m(\u001b[39mcls\u001b[39m, name: \u001b[39mstr\u001b[39m):\n\u001b[1;32m--> 238\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39;49m\u001b[39m.\u001b[39;49m\u001b[39m__getitem__\u001b[39;49m(name)\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "#nested dict\n", + "from collections import defaultdict\n", + "\n", + "g = Graph()\n", + "g.parse(\"SDRF_GUI/ontology/hancestro.owl\", format=\"xml\")\n", + "\n", + "HANCESTRO = Namespace(\"http://www.hancestro.org/ontologies/hancestro#\")\n", + "\n", + "class_labels = defaultdict(dict)\n", + "pairs = []\n", + "all_elements = set()\n", + "for s, p, o in g.triples((None, RDFS.subClassOf, None)):\n", + " sub_class = g.value(s, RDFS.label).toPython()\n", + " super_class = g.value(o, RDFS.label)\n", + " all_elements.add(sub_class)\n", + " all_elements.add(super_class)\n", + " if super_class:\n", + " super_class = super_class.toPython()\n", + " else:\n", + " super_class = \"Thing\"\n", + " pairs.append((super_class, sub_class))\n", + " class_labels[super_class][sub_class] = class_labels[sub_class]\n", + " if sub_class in class_labels:\n", + " del class_labels[sub_class]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "d = defaultdict(dict)\n", + "root = 'ancestry category' # first parent value\n", + "for parent, child in pairs:\n", + " d[parent][child] = d[child]\n", + " if root == child:\n", + " root = parent\n", + "result = {root: d[root]}" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "def flatten(d): \n", + " res = [] # Result list\n", + " if isinstance(d, dict):\n", + " for key, val in d.items():\n", + " res.extend(flatten(val))\n", + " elif isinstance(d, list):\n", + " res = d \n", + " else:\n", + " raise TypeError(\"Undefined type for flatten: %s\"%type(d))\n", + "\n", + " return res" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['European', 'Asian', 'African', 'Native American', 'Hispanic or Latin American', 'Greater Middle Eastern (Middle Eastern, North African or Persian)', 'African American or Afro-Caribbean', 'Oceanian', 'uncategorised population', 'genetically isolated population', 'Aboriginal Australian', 'undefined ancestry population', 'Not available'])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ancestry_dict = result['Thing']['ancestry category']\n", + "ancestry_dict['Not available'] = {}\n", + "ancestry_dict.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Stored all_ancestry_category_elements as gzipped json'" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from ParsingModule import * \n", + "ancestry_nodes = transform_nested_dict_to_tree(ancestry_dict)\n", + "all_ancestry_elements = flatten(ancestry_dict)\n", + "store_as_gzipped_json(ancestry_dict, \"ancestry_category_dict\")\n", + "store_as_gzipped_json(ancestry_nodes, \"ancestry_category_nodes\")\n", + "store_as_gzipped_json(all_ancestry_elements, \"all_ancestry_category_elements\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def depth(d):\n", + " if isinstance(d, dict):\n", + " return 1 + (max(map(depth, d.values())) if d else 0)\n", + " return 0" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def transform_nested_dict_to_tree(d, parent_label=None, parent_value=None):\n", + " result = []\n", + " for key, value in d.items():\n", + " label = key\n", + " if parent_label:\n", + " label = f\"{key}\"\n", + " children = []\n", + " if value:\n", + " children = transform_nested_dict_to_tree(value, label, key)\n", + " if children:\n", + " result.append({\"label\": label, \"value\": key, \"children\": children})\n", + " else:\n", + " result.append({\"label\": label, \"value\": key})\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "depth(ancestry_dict['European']['Spanish'])" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['European', 'Asian', 'African', 'Native American', 'Hispanic or Latin American', 'Greater Middle Eastern (Middle Eastern, North African or Persian)', 'African American or Afro-Caribbean', 'Oceanian', 'uncategorised population', 'genetically isolated population', 'Aboriginal Australian', 'undefined ancestry population'])" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ancestry_dict.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "ancestry_nodes = transform_nested_dict_to_tree(ancestry_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "all_ancestry_elements = flatten(ancestry_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "#save dict as pickle\n", + "import pickle\n", + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_StreamlitApp\\\\onto_dicts\\\\ancestry_dict.pickle', 'wb') as handle:\n", + " pickle.dump(ancestry_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + "# make streamlit tree\n", + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_StreamlitApp\\\\onto_dicts\\\\ancestry_nodes.pickle', 'wb') as handle:\n", + " pickle.dump(ancestry_nodes, handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + "# flatten into a list of elements\n", + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_StreamlitApp\\\\onto_dicts\\\\all_ancestry_elements.pickle', 'wb') as handle:\n", + " pickle.dump(all_ancestry_elements, handle, protocol=pickle.HIGHEST_PROTOCOL)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# NCBITaxon\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + " \n", + "\n", + " An ontology representation of the NCBI organismal taxonomy\n", + "\n", + " \n", + "\n", + " NCBI organismal classification\n", + "\n", + " Built by https://github.com/obophenotype/ncbitaxon\n", + "\n", + " \n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "\n", + " definition\n", + "\n", + " \n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "\n", + " oio:hasBroadSynonym\n", + "\n", + " acronym\n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "# read first 20 lines of ncbitaxon owl file as txt file to determine the namespace\n", + "with open('C:\\\\Users\\\\tinec\\\\OneDrive - UGent\\\\git\\\\SDRF_GUI\\\\ontology\\\\ncbitaxon.owl', 'r') as f:\n", + " for i in range(50):\n", + " print(f.readline())" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\rdflib\\plugins\\stores\\memory.py:247\u001b[0m, in \u001b[0;36mMemory.add\u001b[1;34m(self, triple, context, quoted)\u001b[0m\n\u001b[0;32m 246\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 247\u001b[0m po \u001b[39m=\u001b[39m spo[subject]\n\u001b[0;32m 248\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mLookupError\u001b[39;00m:\n", + "\u001b[1;31mKeyError\u001b[0m: rdflib.term.URIRef('http://purl.obolibrary.org/obo/NCBITaxon_2781590')", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[5], line 5\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mcollections\u001b[39;00m \u001b[39mimport\u001b[39;00m defaultdict\n\u001b[0;32m 4\u001b[0m g \u001b[39m=\u001b[39m Graph()\n\u001b[1;32m----> 5\u001b[0m g\u001b[39m.\u001b[39;49mparse(\u001b[39m\"\u001b[39;49m\u001b[39mSDRF_GUI/ontology/ncbitaxon.owl\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mformat\u001b[39;49m\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mxml\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[0;32m 7\u001b[0m class_labels \u001b[39m=\u001b[39m defaultdict(\u001b[39mdict\u001b[39m)\n\u001b[0;32m 8\u001b[0m pairs \u001b[39m=\u001b[39m []\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\rdflib\\graph.py:1330\u001b[0m, in \u001b[0;36mGraph.parse\u001b[1;34m(self, source, publicID, format, location, file, data, **args)\u001b[0m\n\u001b[0;32m 1327\u001b[0m parser \u001b[39m=\u001b[39m plugin\u001b[39m.\u001b[39mget(\u001b[39mformat\u001b[39m, Parser)()\n\u001b[0;32m 1328\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 1329\u001b[0m \u001b[39m# TODO FIXME: Parser.parse should have **kwargs argument.\u001b[39;00m\n\u001b[1;32m-> 1330\u001b[0m parser\u001b[39m.\u001b[39mparse(source, \u001b[39mself\u001b[39m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39margs)\n\u001b[0;32m 1331\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mSyntaxError\u001b[39;00m \u001b[39mas\u001b[39;00m se:\n\u001b[0;32m 1332\u001b[0m \u001b[39mif\u001b[39;00m could_not_guess_format:\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\rdflib\\plugins\\parsers\\rdfxml.py:604\u001b[0m, in \u001b[0;36mRDFXMLParser.parse\u001b[1;34m(self, source, sink, **args)\u001b[0m\n\u001b[0;32m 600\u001b[0m content_handler\u001b[39m.\u001b[39mpreserve_bnode_ids \u001b[39m=\u001b[39m preserve_bnode_ids\n\u001b[0;32m 601\u001b[0m \u001b[39m# # We're only using it once now\u001b[39;00m\n\u001b[0;32m 602\u001b[0m \u001b[39m# content_handler.reset()\u001b[39;00m\n\u001b[0;32m 603\u001b[0m \u001b[39m# self._parser.reset()\u001b[39;00m\n\u001b[1;32m--> 604\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_parser\u001b[39m.\u001b[39;49mparse(source)\n", + "File \u001b[1;32mC:\\Python39\\Lib\\xml\\sax\\expatreader.py:111\u001b[0m, in \u001b[0;36mExpatParser.parse\u001b[1;34m(self, source)\u001b[0m\n\u001b[0;32m 109\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreset()\n\u001b[0;32m 110\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_cont_handler\u001b[39m.\u001b[39msetDocumentLocator(ExpatLocator(\u001b[39mself\u001b[39m))\n\u001b[1;32m--> 111\u001b[0m xmlreader\u001b[39m.\u001b[39;49mIncrementalParser\u001b[39m.\u001b[39;49mparse(\u001b[39mself\u001b[39;49m, source)\n\u001b[0;32m 112\u001b[0m \u001b[39mexcept\u001b[39;00m:\n\u001b[0;32m 113\u001b[0m \u001b[39m# bpo-30264: Close the source on error to not leak resources:\u001b[39;00m\n\u001b[0;32m 114\u001b[0m \u001b[39m# xml.sax.parse() doesn't give access to the underlying parser\u001b[39;00m\n\u001b[0;32m 115\u001b[0m \u001b[39m# to the caller\u001b[39;00m\n\u001b[0;32m 116\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_close_source()\n", + "File \u001b[1;32mC:\\Python39\\Lib\\xml\\sax\\xmlreader.py:125\u001b[0m, in \u001b[0;36mIncrementalParser.parse\u001b[1;34m(self, source)\u001b[0m\n\u001b[0;32m 123\u001b[0m buffer \u001b[39m=\u001b[39m file\u001b[39m.\u001b[39mread(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_bufsize)\n\u001b[0;32m 124\u001b[0m \u001b[39mwhile\u001b[39;00m buffer:\n\u001b[1;32m--> 125\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfeed(buffer)\n\u001b[0;32m 126\u001b[0m buffer \u001b[39m=\u001b[39m file\u001b[39m.\u001b[39mread(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_bufsize)\n\u001b[0;32m 127\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclose()\n", + "File \u001b[1;32mC:\\Python39\\Lib\\xml\\sax\\expatreader.py:217\u001b[0m, in \u001b[0;36mExpatParser.feed\u001b[1;34m(self, data, isFinal)\u001b[0m\n\u001b[0;32m 210\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_cont_handler\u001b[39m.\u001b[39mstartDocument()\n\u001b[0;32m 212\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 213\u001b[0m \u001b[39m# The isFinal parameter is internal to the expat reader.\u001b[39;00m\n\u001b[0;32m 214\u001b[0m \u001b[39m# If it is set to true, expat will check validity of the entire\u001b[39;00m\n\u001b[0;32m 215\u001b[0m \u001b[39m# document. When feeding chunks, they are not normally final -\u001b[39;00m\n\u001b[0;32m 216\u001b[0m \u001b[39m# except when invoked from close.\u001b[39;00m\n\u001b[1;32m--> 217\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_parser\u001b[39m.\u001b[39;49mParse(data, isFinal)\n\u001b[0;32m 218\u001b[0m \u001b[39mexcept\u001b[39;00m expat\u001b[39m.\u001b[39merror \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 219\u001b[0m exc \u001b[39m=\u001b[39m SAXParseException(expat\u001b[39m.\u001b[39mErrorString(e\u001b[39m.\u001b[39mcode), e, \u001b[39mself\u001b[39m)\n", + "File \u001b[1;32mC:\\A\\31\\s\\Modules\\pyexpat.c:407\u001b[0m, in \u001b[0;36mStartElement\u001b[1;34m()\u001b[0m\n", + "File \u001b[1;32mC:\\Python39\\Lib\\xml\\sax\\expatreader.py:369\u001b[0m, in \u001b[0;36mExpatParser.start_element_ns\u001b[1;34m(self, name, attrs)\u001b[0m\n\u001b[0;32m 366\u001b[0m newattrs[apair] \u001b[39m=\u001b[39m value\n\u001b[0;32m 367\u001b[0m qnames[apair] \u001b[39m=\u001b[39m qname\n\u001b[1;32m--> 369\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_cont_handler\u001b[39m.\u001b[39;49mstartElementNS(pair, \u001b[39mNone\u001b[39;49;00m,\n\u001b[0;32m 370\u001b[0m AttributesNSImpl(newattrs, qnames))\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\rdflib\\plugins\\parsers\\rdfxml.py:190\u001b[0m, in \u001b[0;36mRDFXMLHandler.startElementNS\u001b[1;34m(self, name, qname, attrs)\u001b[0m\n\u001b[0;32m 188\u001b[0m language \u001b[39m=\u001b[39m parent\u001b[39m.\u001b[39mlanguage\n\u001b[0;32m 189\u001b[0m current\u001b[39m.\u001b[39mlanguage \u001b[39m=\u001b[39m language\n\u001b[1;32m--> 190\u001b[0m current\u001b[39m.\u001b[39;49mstart(name, qname, attrs)\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\rdflib\\plugins\\parsers\\rdfxml.py:328\u001b[0m, in \u001b[0;36mRDFXMLHandler.node_element_start\u001b[1;34m(self, name, qname, attrs)\u001b[0m\n\u001b[0;32m 325\u001b[0m subject \u001b[39m=\u001b[39m BNode()\n\u001b[0;32m 327\u001b[0m \u001b[39mif\u001b[39;00m name \u001b[39m!=\u001b[39m RDFVOC\u001b[39m.\u001b[39mDescription: \u001b[39m# S1\u001b[39;00m\n\u001b[1;32m--> 328\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mstore\u001b[39m.\u001b[39;49madd((subject, RDF\u001b[39m.\u001b[39;49mtype, absolutize(name)))\n\u001b[0;32m 330\u001b[0m language \u001b[39m=\u001b[39m current\u001b[39m.\u001b[39mlanguage\n\u001b[0;32m 331\u001b[0m \u001b[39mfor\u001b[39;00m att \u001b[39min\u001b[39;00m atts:\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\rdflib\\graph.py:457\u001b[0m, in \u001b[0;36mGraph.add\u001b[1;34m(self, triple)\u001b[0m\n\u001b[0;32m 455\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(p, Node), \u001b[39m\"\u001b[39m\u001b[39mPredicate \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m must be an rdflib term\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m (p,)\n\u001b[0;32m 456\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(o, Node), \u001b[39m\"\u001b[39m\u001b[39mObject \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m must be an rdflib term\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m (o,)\n\u001b[1;32m--> 457\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__store\u001b[39m.\u001b[39;49madd((s, p, o), \u001b[39mself\u001b[39;49m, quoted\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m)\n\u001b[0;32m 458\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\n", + "File \u001b[1;32mc:\\Users\\tinec\\anaconda3\\envs\\sdrf\\lib\\site-packages\\rdflib\\plugins\\stores\\memory.py:248\u001b[0m, in \u001b[0;36mMemory.add\u001b[1;34m(self, triple, context, quoted)\u001b[0m\n\u001b[0;32m 246\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 247\u001b[0m po \u001b[39m=\u001b[39m spo[subject]\n\u001b[1;32m--> 248\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mLookupError\u001b[39;49;00m:\n\u001b[0;32m 249\u001b[0m po \u001b[39m=\u001b[39m spo[subject] \u001b[39m=\u001b[39m {}\n\u001b[0;32m 250\u001b[0m \u001b[39mtry\u001b[39;00m:\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "#nested dict\n", + "from collections import defaultdict\n", + "\n", + "g = Graph()\n", + "g.parse(\"SDRF_GUI/ontology/ncbitaxon.owl\", format=\"xml\")\n", + "\n", + "class_labels = defaultdict(dict)\n", + "pairs = []\n", + "all_elements = set()\n", + "for s, p, o in g.triples((None, RDFS.subClassOf, None)):\n", + " sub_class = g.value(s, RDFS.label).toPython()\n", + " super_class = g.value(o, RDFS.label)\n", + " all_elements.add(sub_class)\n", + " all_elements.add(super_class)\n", + " if super_class:\n", + " super_class = super_class.toPython()\n", + " else:\n", + " super_class = \"Thing\"\n", + " pairs.append((super_class, sub_class))\n", + " class_labels[super_class][sub_class] = class_labels[sub_class]\n", + " if sub_class in class_labels:\n", + " del class_labels[sub_class]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "sdrf", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "22ea4e8e4bc62f1f2c468860a17a62e47bb896f26c043965a0be0ae51df573cc" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/ParsingModule.py b/ParsingModule.py new file mode 100644 index 0000000..ca046fe --- /dev/null +++ b/ParsingModule.py @@ -0,0 +1,389 @@ +import pronto +from pronto import Ontology +from collections import defaultdict +import json +import gzip +import pickle +import streamlit as st +import numpy as np +import pandas as pd +import re +from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode, DataReturnMode +from streamlit_tree_select import tree_select + +def help(): + """This module contains all parsable functions necessary to build the SDRF GUI""" + print("This module contains all parsable functions necessary to build the SDRF GUI") + print( + "The get_json_subclasses function returns a nested dictionary of all subclasses of a given term in a json ontology" + ) + print( + "The get_obo_subclasses function returns a nested dictionary of all subclasses of a given term in an obo ontology" + ) + print("The flatten function returns a list of all values in a nested dictionary") + print( + "The transform_nested_dict_to_tree function returns a list of dictionaries that can be used to build a tree in streamlit" + ) + print( + "The store_as_gzipped_json function stores a dictionary/list as a gzipped json file" + ) + print( + "The open_gzipped_json function opens a gzipped json file and returns the dictionary/list" + ) + + +def get_json_subclasses(ontology, term_id, term_label, d, nodes_dict=None, data=None): + """This function takes the path to the ontology file in json format, the desired term id from the root node (e.g. http://www.ebi.ac.uk/efo/EFO_0000635) and the term label (e.g. 'organism part') + and returns a nested dictionary of all subclasses of the given term. + """ + if nodes_dict is None: # load the json file only once + with open(ontology) as f: + data = json.load(f) + nodes_dict = { + node["id"]: node["lbl"] + for node in data["graphs"][0]["nodes"] + if all(key in node for key in ["id", "lbl"]) + } + + if term_id not in nodes_dict: + return f"{term_id} node not in ontology" # node not found in ontology, return early + + if term_label not in d: + d[term_label] = {} # add the parent to the dictionary + + for term in data["graphs"][0]["edges"]: # iterate through the edges + if (term["obj"] == term_id) and ( + term["pred"] in ["http://purl.obolibrary.org/obo/BFO_0000050", "is_a"] + ): + parent = term["sub"] + if parent == "http://purl.obolibrary.org/obo/MONDO_0011876": + continue # skip MONDO_0011876 + parent_label = nodes_dict.get(parent) + if parent_label is not None: + if parent_label in d: + d[term_label][parent_label] = d[parent_label] + del d[parent_label] + else: + d[term_label][parent_label] = {} + get_json_subclasses( + ontology, parent, parent_label, d[term_label], nodes_dict, data + ) + + return d + + +def remove_duplicate_values(d): + for k, v in d.items(): + if isinstance(v, dict): + remove_duplicate_values(v) + if k in v: + del v[k] + + return d + + +def get_obo_subclasses(onto, obo_id, obo_label, d=None, distance=1): + if d is None: + d = defaultdict(dict) + """This function is built on pronto. + It takes the path to the ontology file in obo format, the desired term id from the root node (e.g. MS:1000031) and the term label (e.g. 'instrument model') + and returns a nested dictionary of all subclasses of the given term. To only get the direct subclasses, the distance is set to 1 + """ + + subclasses = list(onto[obo_id].subclasses(distance=1)) + if len(subclasses) > 1: + d[obo_label] = {} + for i in subclasses[1:]: + obo_id = i.id + obo_label = i.name + d[obo_label] = get_obo_subclasses( + onto, obo_id, obo_label, defaultdict(dict), distance=1 + ) + else: + d = {} + + d = remove_dupcliate_values(d) + return d + + +def flatten(d): + """This function takes a nested dictionary and returns all unique elements in the dictionary as a list""" + if not isinstance(d, dict): + print("Input is not a dictionary") + items = [] + for k, v in d.items(): # iterate through the dictionary + items.append(k) # add the key to the list + if isinstance( + v, dict + ): # if the value is a dictionary, call the function recursively + items.extend(flatten(v)) + else: + items.append(v) + items = list(set(items)) + return items + + +def transform_nested_dict_to_tree(d, parent_label=None, parent_value=None): + """This function takes a nested dictionary and returns a tree like dictionary that can be used in streamlit streamlit_tree_select""" + if not isinstance(d, dict): + print("Input is not a dictionary") + result = [] + for key, value in d.items(): + label = key + if parent_label: + label = f"{parent_label} , {key}" + children = [] + if value: + children = transform_nested_dict_to_tree(value, label, key) + if children: + result.append({"label": key, "value": label, "children": children}) + else: + result.append({"label": key, "value": label}) + return result + + +def store_as_gzipped_json(data, filename): + """ "Given a datatype to store and the filename, this function stores the data as a gzipped json file in .\\data""" + path = ( + ".\\data\\" + + filename + + ".json.gz" + ) + with gzip.open(path, "wt") as f: + json.dump(data, f) + return f"Stored {filename} as gzipped json" + + +def open_gzipped_json(filename): + """ "Given a filename, this function opens the data that was stored as a gzipped json in .\\data""" + path = ( + ".\\data\\" + + filename + + ".json.gz" + ) + with gzip.open(path, "rt") as f: + data = json.load(f) + return data + +def fill_in_from_list(df, column, values_list=None, multiple_in_one=False): + """provide dataframe, column and optional a list of values. + reates an editable dataframe in which only that column can be modified possibly with the values from the list + If the list is empty, the column is freely editable + If the list contains only one value, the column is filled with that value + If the list contains more than one value, a dropdown menu is created with the values from the list + If multiple_in_one is True, multiple columns are created with the same dropdown menu""" + columns_to_adapt = [column] + df.fillna("empty", inplace=True) + cell_style = {"background-color": "#ffa478"} + builder = GridOptionsBuilder.from_dataframe(df) + + if values_list and (len(values_list)==1): # if there is only one value, fill in the column with that value + df[column] = values_list[0] + df.replace("empty", np.nan, inplace=True) + elif values_list and (len(values_list)>1): # if there is a list of values, add a dropdown menu to the column + # add '' to the beginning of values list so it starts with an empty input + values_list.insert(0, "") + values_list.insert(1, "NA") #add NA + if multiple_in_one: # add columns based on number of values in values_list + for i in range(len(values_list)-1): + df[f"{column}_{i}"] = "" + columns_to_adapt.append(f"{column}_{i}") + builder.configure_columns(columns_to_adapt, editable=True, cellEditor="agSelectCellEditor", cellEditorParams={"values": values_list}, cellStyle = cell_style) + else: # if not multiple_in_one, just add the column + builder.configure_column(column,editable=True,cellEditor="agSelectCellEditor",cellEditorParams={"values": values_list}, cellStyle = cell_style) + builder.configure_grid_options(enableRangeSelection=True, enableFillHandle=True, suppressMovableColumns=True, singleClickEdit=True) + gridOptions = builder.build() + grid_return = AgGrid( + df, + gridOptions=gridOptions, + update_mode=GridUpdateMode.MANUAL, + data_return_mode=DataReturnMode.AS_INPUT) + df = grid_return["data"] + df.replace("empty", np.nan, inplace=True) + + elif values_list is None: # if there is no list of values, make the column editable + builder.configure_column(column, editable=True, cellStyle = cell_style) + builder.configure_grid_options(enableRangeSelection=True, enableFillHandle=True, suppressMovableColumns=True, singleClickEdit=True) + gridOptions = builder.build() + grid_return = AgGrid( + df, + gridOptions=gridOptions, + update_mode=GridUpdateMode.MANUAL, + data_return_mode=DataReturnMode.AS_INPUT) + df = grid_return["data"] + df.replace("empty", np.nan, inplace=True) + return df + +def multiple_ontology_tree(column, element_list, nodes, df, multiple_in_one = False): + """ + This function asks the column name, all the elements for the drop down menu and the nodes for the tree. + It asks for the number of inputs and then creates the input dataframe with in-cell drop down menus with the chosen values. + """ + #get index of column based on name + if column not in df.columns: + df[column] = np.nan + index = df.columns.get_loc(column) + col1, col2, col3 = st.columns(3) + columns_to_adapt = [column] + with col1: + multiple = st.radio(f"Are there multiple {column} in your data?", ("No", "Yes")) + if multiple == "Yes": + with col2: + number = st.number_input( + f"How many different {column} are in your data?", + min_value=0, + step=1) + with col3: + if multiple_in_one: + multiple_in_one_sel = st.radio(f"Are there multiple {column} within one sample?", ("No", "Yes")) + if multiple_in_one_sel == "Yes": + for i in range(number-1): + # add column next to the original column if it is not already there + if f"{column}_{i+1}" not in df.columns: + df.insert(index+1, f"{column}_{i+1}", "empty") + columns_to_adapt.append(f"{column}_{i+1}") + else: + number = 1 + + with st.form("Select here your ontology terms using the autocomplete function or the ontology-based tree menu", clear_on_submit=True): + col4, col5 = st.columns(2) + with col4: + # selectbox with search option + element_list.append(" ") + element_list = set(element_list) + return_search = st.multiselect( + "Select your matching ontology term using this autocomplete function", + element_list, + max_selections=number, + ) + + with col5: + st.write("Or follow the ontology based drop down menu below") + return_select = tree_select( + nodes, no_cascade=True, expand_on_click=True, check_model="leaf" + ) + all = return_search + return_select["checked"] + all = [i.split(',')[-1] for i in all if i is not None] + if (len(all) >= 1) & (len(all) != number): + st.error(f"You need to select a total of {number}.") + s = st.form_submit_button("Submit selection") + if s: + st.write(f"Selection contains: {all}") + + if s & (len(all) == 1) & number == 1: + df[column] = all[0] + st.experimental_rerun() + + else: + df.fillna("empty", inplace=True) + st.write(f"If all cells are correctly filled in click twice on the update button") + cell_style = {"background-color": "#ffa478"} + builder = GridOptionsBuilder.from_dataframe(df) + builder.configure_columns(columns_to_adapt,editable=True,cellEditor="agSelectCellEditor",cellEditorParams={"values": all},cellStyle=cell_style) + builder.configure_grid_options(enableRangeSelection=True, enableFillHandle=True, suppressMovableColumns=True, singleClickEdit=True) + go = builder.build() + grid_return = AgGrid(df,gridOptions=go,update_mode=GridUpdateMode.MANUAL,data_return_mode=DataReturnMode.AS_INPUT) + df = grid_return["data"] + df.replace("empty", np.nan, inplace=True) + return df + +def convert_df(df): + return df.to_csv(index=False).encode("utf-8") + +# function check_df_for_ontology_terms +# checks if the dataframe contains ontology terms + +def check_df_for_ontology_terms(df, columns_to_check, column_ontology_dict): + clear_columns = [] + for i in columns_to_check: + name = (i.split('[')[-1].split(']')[0]).replace(' ', '_') + name = 'all_' + name + '_elements' + #if the column is an ontology column + if name in column_ontology_dict.keys(): + onto_elements = column_ontology_dict[name] + elements = df[i].unique() + elements = [i for i in elements if i is not np.nan] + # check if elements are all in onto_elements + # if not, return the elements that are not in the ontology + if not set(elements).issubset(set(onto_elements)): + not_in_onto = set(elements) - set(onto_elements) + st.error(f'The following elements are not in the ontology: {not_in_onto}') + clear_columns.append(i) + elif set(elements).issubset(set(onto_elements)) and len(elements) >= 1: + st.success(f'The column {i} contains only ontology terms') + if i == 'characteristics[age]': + if not check_age_format(df, 'characteristics[age]'): + st.error(f'The age format is not correct. Please use the following format: 1Y 2M 3D') + clear_columns.append(i) + if i == 'characteristics[sex]': + uniques = np.unique(df[i].values()) + accepted = ['M', 'F', 'unknown'] + # check if uniques contain value that is not in accepted + if not set(uniques).issubset(set(accepted)): + not_in_onto = set(uniques) - set(accepted) + st.error(f'{not_in_onto} are not accepted in the characteristics[sex] column. Please use M, F or unknown') + clear_columns.append(i) + + # if there are columns that are not in the ontology, ask if the user wants to clear them + if len(clear_columns) >= 1: + st.error(f'The following columns contain elements that are not in the ontology: {clear_columns}') + st.write('Do you want to clear these columns?') + y = st.checkbox("Yes") + n = st.checkbox("No") + if y: + for i in clear_columns: + df[i] = np.nan + st.success(f'Column {i} has been cleared') + +def check_age_format(df, column): + for index, row in df.iterrows(): + if (row[column] != "") and (row[column] != "empty") and (row[column] != "None"): + st.write(row[column]) + if not re.match(r"^\d+Y\s\d+M\s\d+D$", row[column]): + return False + return True + + +def convert_df(df): + """This function requires a dataframe and sorts its columns as source name - characteristics - others - comment. It then converts the dataframe to a tsv file and downloads it""" + #sort dataframe so that "source name" is the first column + cols = df.columns.tolist() + #get all elements from the list that start with "characteristic" and sort them alphabetically + characteristic_cols = sorted([i for i in cols if i.startswith("characteristic")]) + comment_cols = sorted([i for i in cols if i.startswith("comment")]) + #get all columns that don't start with "characteristic" or "comment" + other_cols = [i for i in cols if i not in characteristic_cols and i not in comment_cols and i not in ["source name"]] + #reorder the columns + new_cols = ["source name"] + characteristic_cols + other_cols + comment_cols + df = df[new_cols] + return df.to_csv(index=False, sep="\t").encode("utf-8") + + +def autocomplete_species_search(taxum_list, search_term): + col1, col2 = st.columns(2) + if (search_term != "") and (search_term != None): + # Use the filter method to dynamically filter the list of options + filtered_options = list(filter(lambda x: search_term.lower() in x.lower(), taxum_list)) + exact_match = list(filter(lambda x: search_term.lower() == x.lower(), taxum_list)) + if exact_match: + with col1: + st.write(f"An exact match was found: **{exact_match[0]}**") + with col2: + use_exact_match = st.checkbox("Use exact match", key=f"exact_{search_term}") + if use_exact_match: + return exact_match[0] + # if length is between 0 and 250, display the options + if len(filtered_options) > 0 and len(filtered_options) < 500: + with col1: + selected_options = st.multiselect("Some options closely matching your search time could be found", filtered_options) + # Display the selected options + with col2: + if selected_options: + st.write("You selected:", selected_options) + use_options = st.checkbox("Use selected options", key=f"selected_{search_term}") + if use_options: + return selected_options + if len(filtered_options) > 500: + st.write("Too many closely related options to display. Please refine your search.") + if len(filtered_options) == 0: + st.write("No options found. Please refine your search.") \ No newline at end of file diff --git a/README.md b/README.md index d9a14d7..c509c82 100644 --- a/README.md +++ b/README.md @@ -1 +1,21 @@ -# SDRF_application \ No newline at end of file +# SDRF_StreamlitApp + +This github repository contains the code used to create the SDRF Streamlit application. + +The ontologies were downloaded in the format that was available and parsable. +In obo format: +PRIDE CV (version 2022-11-17) - PSI-MS (version 2022-09-26) - NCBITaxon (version 2022-08-18) +In OWL format: +CL (version 2022-12-25) - HANCESTRO (version 2.6) +In JSON format: +EFO (version 3.49.0) +Data from the Unimod database for protein modifications was also copied in csv format from their website + + +The parsed ontologies used in the streamlit app are in the data folder. Every topic contains three types of data: +- all_elements: a list of every term in the ontology subset. Is used to check for ontology compatibility of local metadata. +- dict: a nested dictionary that follows the ontology tree structure +- nodes: a node like version of the nested dictionary according to the format required for the tree-select module: https://github.com/Schluca/streamlit_tree_select + +To start the app you run: ** streamlit run Home.py ** +This will then open the Home screen of the app. The following steps can be found in the pages folder and are numbered accordingly. diff --git a/__pycache__/ParsingModule.cpython-39.pyc b/__pycache__/ParsingModule.cpython-39.pyc new file mode 100644 index 0000000..2819812 Binary files /dev/null and b/__pycache__/ParsingModule.cpython-39.pyc differ diff --git a/__pycache__/multiple.cpython-39.pyc b/__pycache__/multiple.cpython-39.pyc new file mode 100644 index 0000000..822047b Binary files /dev/null and b/__pycache__/multiple.cpython-39.pyc differ diff --git a/__pycache__/streamlit.cpython-310.pyc b/__pycache__/streamlit.cpython-310.pyc new file mode 100644 index 0000000..d7da2f6 Binary files /dev/null and b/__pycache__/streamlit.cpython-310.pyc differ diff --git a/__pycache__/streamlit.cpython-39.pyc b/__pycache__/streamlit.cpython-39.pyc new file mode 100644 index 0000000..8079c5f Binary files /dev/null and b/__pycache__/streamlit.cpython-39.pyc differ diff --git a/data/alkylation_dict.json.gz b/data/alkylation_dict.json.gz new file mode 100644 index 0000000..26d5f83 Binary files /dev/null and b/data/alkylation_dict.json.gz differ diff --git a/data/alkylation_nodes.json.gz b/data/alkylation_nodes.json.gz new file mode 100644 index 0000000..c57d591 Binary files /dev/null and b/data/alkylation_nodes.json.gz differ diff --git a/data/all_alkylation_elements.json.gz b/data/all_alkylation_elements.json.gz new file mode 100644 index 0000000..be1d252 Binary files /dev/null and b/data/all_alkylation_elements.json.gz differ diff --git a/data/all_ancestry_category_elements.json.gz b/data/all_ancestry_category_elements.json.gz new file mode 100644 index 0000000..2b9e292 Binary files /dev/null and b/data/all_ancestry_category_elements.json.gz differ diff --git a/data/all_archaea_elements.json.gz b/data/all_archaea_elements.json.gz new file mode 100644 index 0000000..21a862b Binary files /dev/null and b/data/all_archaea_elements.json.gz differ diff --git a/data/all_bacteria_elements.json.gz b/data/all_bacteria_elements.json.gz new file mode 100644 index 0000000..b4f14e5 Binary files /dev/null and b/data/all_bacteria_elements.json.gz differ diff --git a/data/all_cell_elements.json.gz b/data/all_cell_elements.json.gz new file mode 100644 index 0000000..ced71e5 Binary files /dev/null and b/data/all_cell_elements.json.gz differ diff --git a/data/all_cell_line_elements.json.gz b/data/all_cell_line_elements.json.gz new file mode 100644 index 0000000..d26aebb Binary files /dev/null and b/data/all_cell_line_elements.json.gz differ diff --git a/data/all_cell_type_elements.json.gz b/data/all_cell_type_elements.json.gz new file mode 100644 index 0000000..8a17f99 Binary files /dev/null and b/data/all_cell_type_elements.json.gz differ diff --git a/data/all_cleavage_agent_elements.json.gz b/data/all_cleavage_agent_elements.json.gz new file mode 100644 index 0000000..d6aa313 Binary files /dev/null and b/data/all_cleavage_agent_elements.json.gz differ diff --git a/data/all_cleavage_list_elements.json.gz b/data/all_cleavage_list_elements.json.gz new file mode 100644 index 0000000..36fffcb Binary files /dev/null and b/data/all_cleavage_list_elements.json.gz differ diff --git a/data/all_developmental_stage_elements.json.gz b/data/all_developmental_stage_elements.json.gz new file mode 100644 index 0000000..da2ebae Binary files /dev/null and b/data/all_developmental_stage_elements.json.gz differ diff --git a/data/all_disease_elements.json.gz b/data/all_disease_elements.json.gz new file mode 100644 index 0000000..79a1836 Binary files /dev/null and b/data/all_disease_elements.json.gz differ diff --git a/data/all_dissociation_elements.json.gz b/data/all_dissociation_elements.json.gz new file mode 100644 index 0000000..106dd28 Binary files /dev/null and b/data/all_dissociation_elements.json.gz differ diff --git a/data/all_enrichment_elements.json.gz b/data/all_enrichment_elements.json.gz new file mode 100644 index 0000000..da3f893 Binary files /dev/null and b/data/all_enrichment_elements.json.gz differ diff --git a/data/all_eukaryota_elements.json.gz b/data/all_eukaryota_elements.json.gz new file mode 100644 index 0000000..c301883 Binary files /dev/null and b/data/all_eukaryota_elements.json.gz differ diff --git a/data/all_fractionation_method_elements.json.gz b/data/all_fractionation_method_elements.json.gz new file mode 100644 index 0000000..2356bb8 Binary files /dev/null and b/data/all_fractionation_method_elements.json.gz differ diff --git a/data/all_instrument_elements.json.gz b/data/all_instrument_elements.json.gz new file mode 100644 index 0000000..eaed106 Binary files /dev/null and b/data/all_instrument_elements.json.gz differ diff --git a/data/all_label_elements.json.gz b/data/all_label_elements.json.gz new file mode 100644 index 0000000..d1957b4 Binary files /dev/null and b/data/all_label_elements.json.gz differ diff --git a/data/all_organism_elements.json.gz b/data/all_organism_elements.json.gz new file mode 100644 index 0000000..f3c1587 Binary files /dev/null and b/data/all_organism_elements.json.gz differ diff --git a/data/all_organism_part_dict_elements.json.gz b/data/all_organism_part_dict_elements.json.gz new file mode 100644 index 0000000..ac6e9f6 Binary files /dev/null and b/data/all_organism_part_dict_elements.json.gz differ diff --git a/data/all_organism_part_elements.json.gz b/data/all_organism_part_elements.json.gz new file mode 100644 index 0000000..b52f16b Binary files /dev/null and b/data/all_organism_part_elements.json.gz differ diff --git a/data/all_orgpart_elements.json.gz b/data/all_orgpart_elements.json.gz new file mode 100644 index 0000000..6229cbb Binary files /dev/null and b/data/all_orgpart_elements.json.gz differ diff --git a/data/all_other_sequences_elements.json.gz b/data/all_other_sequences_elements.json.gz new file mode 100644 index 0000000..d5a0862 Binary files /dev/null and b/data/all_other_sequences_elements.json.gz differ diff --git a/data/all_reduction_reagent_elements.json.gz b/data/all_reduction_reagent_elements.json.gz new file mode 100644 index 0000000..1009385 Binary files /dev/null and b/data/all_reduction_reagent_elements.json.gz differ diff --git a/data/all_unclassified_elements.json.gz b/data/all_unclassified_elements.json.gz new file mode 100644 index 0000000..dac61c3 Binary files /dev/null and b/data/all_unclassified_elements.json.gz differ diff --git a/data/all_virus_elements.json.gz b/data/all_virus_elements.json.gz new file mode 100644 index 0000000..005f286 Binary files /dev/null and b/data/all_virus_elements.json.gz differ diff --git a/data/ancestry_category_dict.json.gz b/data/ancestry_category_dict.json.gz new file mode 100644 index 0000000..d80d560 Binary files /dev/null and b/data/ancestry_category_dict.json.gz differ diff --git a/data/ancestry_category_nodes.json.gz b/data/ancestry_category_nodes.json.gz new file mode 100644 index 0000000..abe9dff Binary files /dev/null and b/data/ancestry_category_nodes.json.gz differ diff --git a/data/autocomplete_inputs.json.gz b/data/autocomplete_inputs.json.gz new file mode 100644 index 0000000..83c4f73 Binary files /dev/null and b/data/autocomplete_inputs.json.gz differ diff --git a/data/cell_dict.json.gz b/data/cell_dict.json.gz new file mode 100644 index 0000000..d2ebe96 Binary files /dev/null and b/data/cell_dict.json.gz differ diff --git a/data/cell_line_dict.json.gz b/data/cell_line_dict.json.gz new file mode 100644 index 0000000..5f2fa06 Binary files /dev/null and b/data/cell_line_dict.json.gz differ diff --git a/data/cell_line_nodes.json.gz b/data/cell_line_nodes.json.gz new file mode 100644 index 0000000..19509d2 Binary files /dev/null and b/data/cell_line_nodes.json.gz differ diff --git a/data/cell_nodes.json.gz b/data/cell_nodes.json.gz new file mode 100644 index 0000000..6ea18ca Binary files /dev/null and b/data/cell_nodes.json.gz differ diff --git a/data/cell_type_dict.json.gz b/data/cell_type_dict.json.gz new file mode 100644 index 0000000..96e1d45 Binary files /dev/null and b/data/cell_type_dict.json.gz differ diff --git a/data/cell_type_nodes.json.gz b/data/cell_type_nodes.json.gz new file mode 100644 index 0000000..46fa369 Binary files /dev/null and b/data/cell_type_nodes.json.gz differ diff --git a/data/cleavage_agent_dict.json.gz b/data/cleavage_agent_dict.json.gz new file mode 100644 index 0000000..8dbd06c Binary files /dev/null and b/data/cleavage_agent_dict.json.gz differ diff --git a/data/cleavage_agent_nodes.json.gz b/data/cleavage_agent_nodes.json.gz new file mode 100644 index 0000000..fa4da53 Binary files /dev/null and b/data/cleavage_agent_nodes.json.gz differ diff --git a/data/developmental_stage_dict.json.gz b/data/developmental_stage_dict.json.gz new file mode 100644 index 0000000..8cea68a Binary files /dev/null and b/data/developmental_stage_dict.json.gz differ diff --git a/data/developmental_stage_nodes.json.gz b/data/developmental_stage_nodes.json.gz new file mode 100644 index 0000000..d402428 Binary files /dev/null and b/data/developmental_stage_nodes.json.gz differ diff --git a/data/disease_dict.json.gz b/data/disease_dict.json.gz new file mode 100644 index 0000000..892b0ce Binary files /dev/null and b/data/disease_dict.json.gz differ diff --git a/data/disease_nodes.json.gz b/data/disease_nodes.json.gz new file mode 100644 index 0000000..f25f0f6 Binary files /dev/null and b/data/disease_nodes.json.gz differ diff --git a/data/dissociation_dict.json.gz b/data/dissociation_dict.json.gz new file mode 100644 index 0000000..bb5c065 Binary files /dev/null and b/data/dissociation_dict.json.gz differ diff --git a/data/dissociation_nodes.json.gz b/data/dissociation_nodes.json.gz new file mode 100644 index 0000000..49fd45c Binary files /dev/null and b/data/dissociation_nodes.json.gz differ diff --git a/data/enrichment_dict.json.gz b/data/enrichment_dict.json.gz new file mode 100644 index 0000000..1b9490c Binary files /dev/null and b/data/enrichment_dict.json.gz differ diff --git a/data/enrichment_nodes.json.gz b/data/enrichment_nodes.json.gz new file mode 100644 index 0000000..f98a10f Binary files /dev/null and b/data/enrichment_nodes.json.gz differ diff --git a/data/fractionation_dict.json.gz b/data/fractionation_dict.json.gz new file mode 100644 index 0000000..72c14a3 Binary files /dev/null and b/data/fractionation_dict.json.gz differ diff --git a/data/fractionation_nodes.json.gz b/data/fractionation_nodes.json.gz new file mode 100644 index 0000000..8ea7017 Binary files /dev/null and b/data/fractionation_nodes.json.gz differ diff --git a/data/instrument_dict.json.gz b/data/instrument_dict.json.gz new file mode 100644 index 0000000..f717d49 Binary files /dev/null and b/data/instrument_dict.json.gz differ diff --git a/data/instrument_nodes.json.gz b/data/instrument_nodes.json.gz new file mode 100644 index 0000000..8e98586 Binary files /dev/null and b/data/instrument_nodes.json.gz differ diff --git a/data/label_dict.json.gz b/data/label_dict.json.gz new file mode 100644 index 0000000..7eb98a1 Binary files /dev/null and b/data/label_dict.json.gz differ diff --git a/data/label_nodes.json.gz b/data/label_nodes.json.gz new file mode 100644 index 0000000..f208f1a Binary files /dev/null and b/data/label_nodes.json.gz differ diff --git a/data/organism_dict.json.gz b/data/organism_dict.json.gz new file mode 100644 index 0000000..c7b9a3d Binary files /dev/null and b/data/organism_dict.json.gz differ diff --git a/data/organism_nodes.json.gz b/data/organism_nodes.json.gz new file mode 100644 index 0000000..2d28d54 Binary files /dev/null and b/data/organism_nodes.json.gz differ diff --git a/data/organism_part_dict.json.gz b/data/organism_part_dict.json.gz new file mode 100644 index 0000000..76e1e63 Binary files /dev/null and b/data/organism_part_dict.json.gz differ diff --git a/data/organism_part_nodes.json.gz b/data/organism_part_nodes.json.gz new file mode 100644 index 0000000..478b896 Binary files /dev/null and b/data/organism_part_nodes.json.gz differ diff --git a/data/reduction_dict.json.gz b/data/reduction_dict.json.gz new file mode 100644 index 0000000..8a5fdfd Binary files /dev/null and b/data/reduction_dict.json.gz differ diff --git a/data/reduction_nodes.json.gz b/data/reduction_nodes.json.gz new file mode 100644 index 0000000..e6f28af Binary files /dev/null and b/data/reduction_nodes.json.gz differ diff --git a/data/unimod_dict.json.gz b/data/unimod_dict.json.gz new file mode 100644 index 0000000..894153f Binary files /dev/null and b/data/unimod_dict.json.gz differ diff --git a/ontology/cl.owl.gz b/ontology/cl.owl.gz new file mode 100644 index 0000000..fd5ac85 Binary files /dev/null and b/ontology/cl.owl.gz differ diff --git a/ontology/compress.py b/ontology/compress.py new file mode 100644 index 0000000..a292db8 --- /dev/null +++ b/ontology/compress.py @@ -0,0 +1,11 @@ +import gzip + + +for input_filename in ['efo.json', 'hancestro.owl', 'ms.owl', 'pride_cv_updated.obo', 'pride_cv.obo', 'psi-ms.obo', 'unimod.csv']: + output_filename = input_filename + ".gz" + + with open(input_filename, 'rb') as f_in: + with gzip.open(output_filename, 'wb') as f_out: + f_out.writelines(f_in) + + print(f'Compressed {input_filename} to {output_filename}') \ No newline at end of file diff --git a/ontology/efo.json.gz b/ontology/efo.json.gz new file mode 100644 index 0000000..41ec93a Binary files /dev/null and b/ontology/efo.json.gz differ diff --git a/ontology/hancestro.owl.gz b/ontology/hancestro.owl.gz new file mode 100644 index 0000000..987cb3c Binary files /dev/null and b/ontology/hancestro.owl.gz differ diff --git a/ontology/ms.owl.gz b/ontology/ms.owl.gz new file mode 100644 index 0000000..279e085 Binary files /dev/null and b/ontology/ms.owl.gz differ diff --git a/ontology/ncbitaxon.obo.gz b/ontology/ncbitaxon.obo.gz new file mode 100644 index 0000000..e69de29 diff --git a/ontology/pride_cv.obo.gz b/ontology/pride_cv.obo.gz new file mode 100644 index 0000000..d394247 Binary files /dev/null and b/ontology/pride_cv.obo.gz differ diff --git a/ontology/pride_cv_updated.obo.gz b/ontology/pride_cv_updated.obo.gz new file mode 100644 index 0000000..a936b57 Binary files /dev/null and b/ontology/pride_cv_updated.obo.gz differ diff --git a/ontology/psi-ms.obo.gz b/ontology/psi-ms.obo.gz new file mode 100644 index 0000000..f421d9b Binary files /dev/null and b/ontology/psi-ms.obo.gz differ diff --git a/ontology/unimod.csv.gz b/ontology/unimod.csv.gz new file mode 100644 index 0000000..8b16dfe Binary files /dev/null and b/ontology/unimod.csv.gz differ diff --git a/pages/1_1. Mapping_local_metadata.py b/pages/1_1. Mapping_local_metadata.py new file mode 100644 index 0000000..f772872 --- /dev/null +++ b/pages/1_1. Mapping_local_metadata.py @@ -0,0 +1,188 @@ +import streamlit as st +import pandas as pd +import numpy as np +import re +import ParsingModule +import warnings +warnings.filterwarnings("ignore") + +st.set_page_config( + page_title="SDRF annotation tool", + layout="wide", + page_icon="๐Ÿงช", + menu_items={ + "Get help": "https://github.com/TineClaeys/SDRF_GUI", + "Report a bug": "https://github.com/TineClaeys/SDRF_GUI", + }, +) + +st.title("1. Map local metadata to SDRF") +st.markdown( + """If you have a local metadata file available, you can use this file to map the data to the required SDRF information. """ +) +st.markdown( + """**Important:** you can upload the file in csv, tsv or xlsx format. +The order of your raw file names should match the order in which you inputted them in the previous step""" +) + + + + +data_dict = st.session_state["data_dict"] + +# if template_df is not in the session state, don't run all the code below +if "template_df" not in st.session_state: + st.error("Please fill in the template file in the Home page first", icon="๐Ÿšจ") + st.stop() +else: + template_df = st.session_state["template_df"] + st.write("**This is your current SDRF file.**") + st.write(template_df) + +with st.sidebar: + download = st.download_button("Press to download SDRF file",ParsingModule.convert_df(template_df), "intermediate_SDRF.sdrf.tsv", help="download your SDRF file") + +# Ask the user to upload their own metadata file and to map it to the columns of the template file +metadata_sheet = st.file_uploader( + "Upload your local metadata file (.csv, .tsv or .xls)", type=["csv", "tsv", "xlsx"] +) +if metadata_sheet is not None: + file_extension = metadata_sheet.name.split(".")[-1] + if file_extension == "csv": + metadata_df = pd.read_csv(metadata_sheet) + elif file_extension == "tsv": + metadata_df = pd.read_csv(metadata_sheet, sep="\t") + elif file_extension == "xlsx": + metadata_df = pd.read_excel(metadata_sheet) + st.write("Your metadata file:") + st.dataframe(metadata_df) + if "metadata_df" not in st.session_state: + st.session_state["metadata_df"] = metadata_df + # Check for potential mismatch in number of samples + if metadata_df.shape[0] != template_df.shape[0]: + st.error( + "There is a mismatch in the number of uploaded files and the number of files in the metadata sheet", + icon="๐Ÿšจ", + ) + + meta_columns = list(metadata_df.columns) + template_columns = [ + "source name", "assay name", "technology type", "characteristics[age]", + "characteristics[ancestry category]", + "characteristics[biological replicate]", + "characteristics[cell line]", + "characteristics[cell type]", + "characteristics[developmental stage]", + "characteristics[disease]", + "characteristics[individual]", + "characteristics[organism part]", + "characteristics[organism]", + "characteristics[sex]", + "characteristics[enrichment process", + "characteristics[compound]", + "characteristics[concentration of compound]", + "comment[modification parameters]", + "comment[cleavage agent details]", + "comment[data file]", + "comment[fraction identifier]", + "comment[fractionation method]", + "comment[instrument]", + "comment[label]", + "comment[technical replicate]", + "comment[fragment mass tolerance]", + "comment[precursor mass tolerance]", + "comment[dissociation method]", + "characteristics[spiked compound]", + "characteristics[synthetic peptide]", + "characteristics[phenotype]", + "comment[depletion]", + ] + value_columns=["source name", "assay name", "comment[data file]","comment[fraction identifier]","comment[technical replicate]"] + + # First narrow down the columns in the metadata file that are useful to match to the SDRF file + sel, subm = st.columns(2) + with sel: + columns_to_match = st.multiselect( + "Select columns containing data that will be used in the SDRF data", + meta_columns, + ) + with subm: + submitbox = st.checkbox('Ready to match?') + mismatches = [] + if submitbox: + col1, col2, col3, col4 = st.columns(4) + + selected_col = None + matched_col = None + for i in range(len(columns_to_match)): + if not selected_col: + with col1: + selected_col = st.selectbox( + f"Select column {i+1} to match in your metadata file:", + [""] + columns_to_match, + index=0, + key=f"selected_col{i}", + ) + if selected_col and not matched_col: + with col2: + matched_col = st.selectbox( + f"Select the corresponding column from the SDRF file:", + ["", None] + template_columns, + index=0, + key=f"matched_col{i}", + ) + with col3: + check = st.checkbox("Match and check ontology", key=f"check{i}") + st.write(" ") + st.write(" ") + st.write(" ") + if matched_col != None and check: + input_values = metadata_df[selected_col].unique() + input_values = [ i for i in input_values if i is not np.nan] + name = (matched_col.split('[')[-1].split(']')[0]).replace(' ', '_') + name = 'all_' + name + '_elements' + if (matched_col not in value_columns) and name not in data_dict: + with col4: + st.error("This column does not contain ontology terms. Please fill it in using the next steps in the sidebar") + if matched_col in value_columns: + with col4: + st.success('Great! The local metadata values are valid terms and are mapped to the SDRF file.', icon="โœ…") + template_df[matched_col] = metadata_df[selected_col] + else: + onto_elements = data_dict[name] + if matched_col == "characteristics[organism]": + map_organism_dict = {'Homo sapiens': ['Human', 'human', 'homo sapiens', 'Homo Sapiens'], + 'Mus musculus': ['mouse', 'Mouse', 'Mus Musculus', 'mus musculus'], + 'Arabidopsis thaliana': ['arabidopsis thaliana', 'Arabidopsis Thaliana', 'arabidopsis', 'Arabidopsis', 'thale cress'], + 'Drosophila melanogaster': ['drosophila', 'Drosophila', 'Drosophila Melanogsaster', 'drosophila melanogaster', 'fruitfly', 'fruit fly'], + 'Saccharomyces cerevisiae':['Saccharomyces Cerevisiae', 'saccharomyces cerevisiae', "brewer's yeast", "Brewer's yeast"], + 'Caenorhabditis elegans':['C. Elegans', 'C. elegans', 'c. elegans', 'caenorhabditis elegans', 'Caenorhabditis Elegans', 'worm', 'Worm'], + 'Danio rerio':['Danio Rerio', 'danio rerio', 'zebrafish', 'Zebrafish'], + 'Escherichia coli': ['E. Coli', 'E. coli', 'e. coli', 'Escherichia Coli', 'escherichia coli']} + #dictionary containing the 3 most occuring organisms and all the ways they could be written + # if the input value contains one of the values in the list, it will be replaced by the key + # the value in the dataframe will be replaced by the key + for key, value in map_organism_dict.items(): + for i in input_values: + if i in value: + metadata_df[selected_col].replace(i, key, inplace=True) + input_values[input_values.index(i)] = key + if not set(input_values).issubset(set(onto_elements)): + not_in_onto = set(input_values) - set(onto_elements) + mismatches.append(not_in_onto) + with col4: + st.error(f'{not_in_onto} are not ontology terms. Select the correct terms in the next steps directly from the ontology', icon="โŒ") + + elif (set(input_values).issubset(set(onto_elements))) and (len(input_values)>=1): + with col4: + st.success('Great! The local metadata values are valid terms and are mapped to the SDRF file.' , icon="โœ…") + template_df[matched_col] = metadata_df[selected_col] + if matched_col == None: + with col4: + st.write("Skip this column") + columns_to_match = [col for col in columns_to_match if col != selected_col] + template_columns = [col for col in template_columns if col != matched_col] + selected_col = None + matched_col = None + + diff --git a/pages/2_2. Labeling.py b/pages/2_2. Labeling.py new file mode 100644 index 0000000..2c8674c --- /dev/null +++ b/pages/2_2. Labeling.py @@ -0,0 +1,98 @@ +import streamlit as st +import warnings +warnings.filterwarnings("ignore") +import pandas as pd +import numpy as np +import re +import ParsingModule +from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode, DataReturnMode +from streamlit_tree_select import tree_select +from collections import defaultdict + +st.set_page_config( + page_title="SDRF annotation tool", + layout="wide", + page_icon="๐Ÿงช", + menu_items={ + "Get help": "https://github.com/TineClaeys/SDRF_GUI", + "Report a bug": "https://github.com/TineClaeys/SDRF_GUI", + }, +) +st.title("2. Labelling") +st.markdown( + """If a raw file contains multiple labels, every label will need to be annotated on a different row. Here you can map label information to your raw files. + As a result, the raw file information will be duplicated with the correct label filled in""" +) + +data_dict = st.session_state["data_dict"] +# Get filled in template_df from other page +# if template_df is not in the session state, don't run all the code below +if "template_df" not in st.session_state: + st.error("Please fill in the template file in the Home page first", icon="๐Ÿšจ") + st.stop() +else: + template_df = st.session_state["template_df"] + st.write("**This is your current SDRF file.**") + st.write(template_df) + +if "all_selected_labels" not in st.session_state: + st.session_state["all_selected_labels"] = [] + +with st.sidebar: + download = st.download_button("Press to download SDRF file",ParsingModule.convert_df(template_df), "intermediate_SDRF.sdrf.tsv", help="download your SDRF file") + +#first select the labels +st.write("Input the label that was used in your experiment. If no label was added, indicate this using *label free sample*.") +all_label_elements = data_dict["all_label_elements"] +label_nodes = data_dict["label_nodes"] +with st.form("Select here your ontology terms using the autocomplete function or the ontology-based tree menu", clear_on_submit=True): + col1, col2 = st.columns(2) + with col1: + # selectbox with search option + all_label_elements.append(" ") + all_label_elements = set(all_label_elements) + return_search = st.multiselect( + "Select your matching ontology term using this autocomplete function", + all_label_elements) + with col2: + st.write("Or follow the ontology based drop down menu below") + return_select = tree_select(label_nodes, no_cascade=True, expand_on_click=True, check_model="leaf") + all_selected_labels = return_search + return_select["checked"] + all_selected_labels = [i.split(',')[-1] for i in all_selected_labels if i is not None] + s = st.form_submit_button("Submit selection") + if s: + st.write(f"Selection contains: {all_selected_labels}") + st.session_state["all_selected_labels"] = all_selected_labels +#match filenames to labels +st.write("Match the filenames to the labels you selected above. Select ALL if the label is found in all raw files.") +label_dict = defaultdict(list) +for label in all_selected_labels: + selected_files = st.multiselect(f"Select files labeled with {label}.", ["ALL"] + template_df["comment[data file]"].values.tolist(), key=f"selected_label_{label}") + for i in selected_files: + label_dict[i].append(label) +ready = st.checkbox('Ready?') +# based on the label_dict, duplicate rows with comment[data file] in the keys of the lable_dict +# duplicate the row with each row having one of the labels in the characteristics[label] column that is in the value of the label_dict +# if the key is ALL, duplicate the row for each label from the all_selected_labels list + +if ready: + #first get the rows that need to be duplicated + new_rows = [] + all_selected_labels = st.session_state["all_selected_labels"] + for filename, label_list in label_dict.items(): + if filename == "ALL": + rows_to_add = template_df.copy() + else: + rows_to_add = template_df[template_df["comment[data file]"] == filename] + label_idx = 0 + n = len(label_list) + for i in range(n): + new_row = rows_to_add.copy() + new_row["comment[label]"] = label_list[label_idx] + label_idx += 1 + new_rows.append(new_row) + template_df = pd.concat(new_rows, ignore_index=True) + template_df = template_df.sort_values(by='comment[data file]') + st.write("SDRF file with label information") + st.dataframe(template_df) + st.session_state["template_df"] = template_df \ No newline at end of file diff --git a/pages/3_3. Required_columns.py b/pages/3_3. Required_columns.py new file mode 100644 index 0000000..4c66b88 --- /dev/null +++ b/pages/3_3. Required_columns.py @@ -0,0 +1,776 @@ +import re +import warnings +warnings.filterwarnings("ignore") +import streamlit as st +import pandas as pd +import numpy as np +import json +import gzip +import ParsingModule +import os +from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode, DataReturnMode +from streamlit_tree_select import tree_select + +def update_session_state(df): + st.session_state["template_df"] = df + +# the check age format checks if the column contains data in YMD format +def check_age_format(df, column): + for index, row in df.iterrows(): + if (row[column] != "") and (row[column] != "empty") and (row[column] != "None") and (row[column] != "Not available"): + st.write(row[column]) + if not re.match(r"^\d+Y\s\d+M\s\d+D$", row[column]): + return False + return True + +@st.cache_data +def load_organism_data(): + #find dir one up from current dir + data_dir = os.path.dirname(os.path.dirname(__file__)) + folder_path = os.path.join(data_dir, "data") + data = {} + for filename in os.listdir(folder_path): + # only load the files containing the following names: archae, bacteria, eukaryota, virus, unclassified, other sequences + if re.search(r"archaea|bacteria|eukaryota|virus|unclassified|other_sequences", filename): + file_path = os.path.join(folder_path, filename) + if filename.endswith(".json.gz"): + try: + with gzip.open(file_path, "rb") as f: + file_data = json.load(f) + filename_key = filename.replace(".json.gz", "") + data[filename_key] = file_data + except gzip.BadGzipFile: + st.write(f"Error reading file {file_path}: not a gzipped file") + else: + st.write(f"Skipping file {file_path}: not a gzipped file") + else: + continue + return data + +def organism_selection(species): + lem = organism_data[f"all_{species}_elements"] + search_term = st.text_input(f"Search for an {species} species here", "") + ret = ParsingModule.autocomplete_species_search(lem, search_term) + if ret != None: + return ret + +# Define the default button color (you can adjust this as desired) +default_color = "#ff4b4b" + +# Define the button CSS styles +button_styles = f""" + background-color: white; + color: {default_color}; + border-radius: 20px; + padding: 10px 20px; + border: 1px solid {default_color}; + text-align: center; + text-decoration: none; + display: inline-block; + font-size: 16px; + margin: 4px 2px; + cursor: pointer; +""" + + + +st.set_page_config( + page_title="SDRF annotation tool", + layout="wide", + page_icon="๐Ÿงช", + menu_items={ + "Get help": "https://github.com/TineClaeys/SDRF_GUI", + "Report a bug": "https://github.com/TineClaeys/SDRF_GUI", + }, +) +st.title("""3. Required columns""") +# if template_df is not in the session state, don't run all the code below +if "template_df" not in st.session_state: + st.error("Please fill in the template file in the Home page first", icon="๐Ÿšจ") + st.stop() +else: + template_df = st.session_state["template_df"] + with st.container(): + st.write("**This is your current SDRF file.**") + st.dataframe(template_df) + +data_dict = st.session_state["data_dict"] +empty_columns = [col for col in template_df.columns if template_df[col].isna().all()] +#remove columns from the list that have an underscore in the name(these are added because of multiple in one samples) +empty_columns = [col for col in empty_columns if "_" not in col] +empty_columns.append('undo column') +empty_columns.insert(0, 'start') + + + + +with st.sidebar: + selection = st.radio( + "The following columns are empty and required", + empty_columns, help="If a column you're looking for is not in this display, This means it is not empty. Click on the 'undo column' button and empty your column of interest." + ) + download = st.download_button("Press to download SDRF file",ParsingModule.convert_df(template_df), "intermediate_SDRF.sdrf.tsv", help="download your SDRF file") + + +if selection == 'start': + st.write("""In the sidebar, all the empty columns that are required for you SDRF file are listed. Select the one you want to annotate first. + When a column is filled in, it will disappear from the sidebar so you can keep track on which columns still require input. + When you want to reannotate a previously filled in column, you can do so by clicking on the **undo column** button in the sidebar. + """) + st.write("""Every page will start with your current SDRF file and at the bottom of the page there is a button to download the intermediate SDRF file""") + + + + + +if selection == "source name": + st.write( + """The source name is the unique sample name (it can be present multiple times if the same sample is used several times in the same dataset) eg. healthy_patient_1, diseased_patient_1 + If you did not add it using the previous mapping function, you can input it here manually.") + """ + ) + st.write( + "Type the correct source names in the corresponding column and **double click** *Update* when finished" + ) + template_df = ParsingModule.fill_in_from_list(template_df, "source name") + st.session_state["template_df"] = template_df + + +if selection == "assay name": + st.write( + """The assay name is a name for the run file, this has to be a uniqe value eg. run 1 - run 2 - run 3_fraction1. If you did not add it using the previous mapping function, you can input it here + manually.""" + ) + st.write( + "Type the correct assay names in the corresponding column and click Update twice when finished" + ) + template_df = ParsingModule.fill_in_from_list(template_df, "assay name") + st.session_state["template_df"] = template_df + +if selection == "technology type": + with st.form("Choose the technology type in your sample"): + tech_type = st.radio( + "Choose the technology type from these options:", + ( + "proteomic profiling by mass spectrometry", + "metabolomics profiling by mass spectrometry", + "other", + ), + ) + if st.form_submit_button("Submit"): + template_df["technology type"] = tech_type + st.session_state["template_df"] = template_df + +if selection == "characteristics[age]": + st.subheader("Input the ages of your samples using the Years Months Days format, e.g. 1Y 2M 3D") + multiple = st.selectbox(f"Are there multiple ages in your data?", ("","No", "Yes", "Not available"), help="If you select Not available, the column will be filled in with 'Not available'") + if multiple == "Yes": + template_df = ParsingModule.fill_in_from_list(template_df, "characteristics[age]") + if check_age_format(template_df, "characteristics[age]") == False: + st.error("The age column is not in the correct format, please check and try again") + st.stop() + elif check_age_format(template_df, "characteristics[age]") == True: + st.success("The age column is in the correct format") + update_session_state(template_df) + st.experimental_rerun() + if multiple == "No": + age = st.text_input("Input the age of your sample in Y M D format e.g. 12Y 3M 4D", help="As you only have one age, the inputted age will be immediatly used to fill all cells in the age column") + # check if the age is in Y M D format + if (age != "") and (not re.match(r"^\d+Y\s\d+M\s\d+D$", age)): + st.error("The age is not in the correct format, please check and try again",icon="๐Ÿšจ") + st.stop() + if (age != "") and (re.match(r"^\d+Y\s\d+M\s\d+D$", age)): + st.write("The age is in the correct format") + template_df["characteristics[age]"] = age + update_session_state(template_df) + st.experimental_rerun() # reruns script, makes it unnecessary to click button twice + if multiple == "Not available": + template_df["characteristics[age]"] = "Not available" + update_session_state(template_df) + st.experimental_rerun() + +if selection == "comment[alkylation reagent]": + st.subheader("Input the alkylation reagent that was used in your experiment") + all_alkylation_elements = data_dict["all_alkylation_elements"] + alkylation_nodes = data_dict["alkylation_nodes"] + df = ParsingModule.multiple_ontology_tree(selection, all_alkylation_elements, alkylation_nodes, template_df, multiple_in_one=True) + update_session_state(df) + +if selection == "characteristics[ancestry category]": + st.subheader("Input the ancestry of your samples") + all_ancestry_elements = data_dict["all_ancestry_category_elements"] + ancestry_nodes = data_dict["ancestry_category_nodes"] + df = ParsingModule.multiple_ontology_tree(selection, all_ancestry_elements, ancestry_nodes, template_df, multiple_in_one=False) + update_session_state(df) + + +if selection == "characteristics[cell type]": + st.subheader("Input the cell type of your sample") + all_cell_type = data_dict["all_cell_type_elements"] + cell_type_nodes = data_dict["cell_type_nodes"] + df = ParsingModule.multiple_ontology_tree(selection, all_cell_type, cell_type_nodes, template_df, multiple_in_one=True) + update_session_state(df) + +if selection == "characteristics[cell line]": + st.subheader("Input the cell line of your sample if one was used") + all_celltype = data_dict["all_cell_type_elements"] + celltype_nodes = data_dict["cell_type_nodes"] + df = ParsingModule.multiple_ontology_tree(selection, all_celltype, celltype_nodes, template_df,multiple_in_one=True) + update_session_state(df) + +if selection == "comment[cleavage agent details]": + st.subheader("Select the cleavage agent present in your sample") + all_cleavage_agent_elements = data_dict["all_cleavage_agent_elements"] + cleavage_agent_nodes = data_dict["cleavage_agent_nodes"] + df = ParsingModule.multiple_ontology_tree(selection, all_cleavage_agent_elements, cleavage_agent_nodes, template_df, multiple_in_one=True) + update_session_state(df) + +if selection == "characteristics[compound]": + st.subheader("If a compound was added to your sample, input the name here") + compounds = [] + input_compounds = st.text_input("Input compound names as comma separated list") + if input_compounds is not None: + input_compounds = re.sub(" ", "", input_compounds) + input_compounds = input_compounds.split(",") + compounds.append(input_compounds) + compounds = compounds[0] + st.write(compounds) + if compounds != [""]: + template_df = ParsingModule.fill_in_from_list(template_df, selection, compounds) + st.session_state["template_df"] = template_df + +if selection == "characteristics[concentration of compound]": + st.subheader("Input the concentration with which the compound was added to your sample") + concentration = [] + input_concentration = st.text_input( + "Input compound concentration as comma separated list, don't forget to indicate the unit" + ) + if input_concentration is not None: + input_concentration = re.sub(" ", "", input_concentration) + input_concentration = input_concentration.split(",") + concentration.append(input_concentration) + concentration = concentration[0] + st.write(concentration) + if concentration != [""]: + template_df["characteristics[concentration of compound]"] = "" + template_df = ParsingModule.fill_in_from_list( + template_df, selection, concentration + ) + st.session_state["template_df"] = template_df + + +if selection == "comment[collision energy]": + st.subheader("Input the collision energy that was used in your experiment") + multiple = st.radio( + f"Are there multiple collision energies in your data?", ("No", "Yes") + ) + if multiple == "Yes": + st.write( + "Input the collision energy directly in the SDRF file. Don't forget to indicate the unit" + ) + df = ParsingModule.fill_in_from_list(template_df, selection) + update_session_state(df) + else: + coll_en = st.text_input("Input the collision energy and its unit") + template_df[selection] = coll_en + st.session_state["template_df"] = template_df + +if selection == "characteristics[developmental stage]": + st.subheader("Select the developmental stage of your sample") + all_devstage = data_dict["all_developmental_stage_elements"] + devstage_nodes = data_dict["developmental_stage_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_devstage, all_devstage, template_df + ) + update_session_state(df) + +if selection == "characteristics[disease]": + st.subheader("If you have healthy and control samples, indicate healthy samples using *normal*. Input the disease for the other samples using the ontology") + all_disease_type = data_dict["all_disease_elements"] + disease_nodes = data_dict["disease_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_disease_type, disease_nodes, template_df + ) + update_session_state(df) + +if selection == "comment[dissociation method]": + st.subheader("Input the dissociation method that was used in your experiment") + all_dissociation_elements = data_dict["all_dissociation_elements"] + dissociation_nodes = data_dict["dissociation_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_dissociation_elements, dissociation_nodes, template_df, multiple=True) + update_session_state(df) + +if selection == "characteristics[enrichment process]": + st.subheader("Input the enrichment process that was used in your experiment") + all_enrichment_elements = data_dict["all_enrichment_elements"] + enrichment_nodes = data_dict["enrichment_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_enrichment_elements, enrichment_nodes, template_df + ) + update_session_state(df) + +if selection == "comment[fraction identifier]": + # first ask if they have fractionation + # if they don't ==> fraction identifier = 1 + # if they do: add fraction identifiers + add fractionation method + number_of_fractions = None + col1, col2 = st.columns(2) + with col1: + multiple = st.selectbox( + f"Are there multiple fractions in your data?", ("", "No", "Yes") + ) + number_of_methods = None + if multiple == "Yes": + with col2: + number_of_fractions = st.number_input( + f"How many different fractions are in your data?", + min_value=0, + step=1, + ) + number_of_methods = st.number_input( + f"How many different fractionation methods are used?", + min_value=0, + step=1, + ) + if multiple == "No": + template_df["comment[fraction identifier]"] = 1 + if number_of_methods: + with st.form("Provide details on the fractionation method"): + # template_df["comment[ fractionation method]"] = "" + fractionation_elements = data_dict["all_fractionation_method_elements"] + fractionation_nodes = data_dict["fractionation_nodes"] + col3, col4 = st.columns(2) + with col3: + # selectbox with search option + fractionation_elements.append(" ") + fractionation_elements = set(fractionation_elements) + return_search = st.multiselect( + "Select your matching fractionation term using this autocomplete function", + fractionation_elements, + max_selections=number_of_methods, + ) + with col4: + st.write("Or follow the ontology based drop down menu below") + return_select = tree_select( + fractionation_nodes, + no_cascade=True, + expand_on_click=True, + check_model="leaf", + ) + + if (len(return_select["checked"]) > 1) & ( + len(return_select["checked"]) != number_of_methods + ): + st.error(f"You need to select a total of {number_of_methods}.") + all = return_search + return_select["checked"] + all = [i.split(",")[-1] for i in all if i is not None] + if (len(all) >= 1) & (len(all) != number_of_methods): + st.error(f"You need to select a total of {number_of_methods}.") + x = st.form_submit_button("Submit selection") + if x: + st.write(f"Selection contains: {all}") + + if x: + df = ParsingModule.fill_in_from_list(template_df, "comment[fractionation method]", all) + df = ParsingModule.fill_in_from_list(template_df, "comment[fraction identifier]", [*range(1, number_of_fractions + 1)]) + update_session_state(df) + +if selection == "comment[instrument]": + st.subheader("Input the instrument that was used in your experiment") + all_instrument_elements = data_dict["all_instrument_elements"] + instrument_nodes = data_dict["instrument_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_instrument_elements, instrument_nodes, template_df + ) + update_session_state(df) + +if selection == "characteristics[individual]": + col1, col2, col3 = st.columns(3) + with col1: + sel = st.selectbox("Do you have multiple indiviuals in your data?", ("", "No", "Yes", "Not available")) + if sel == "No": + template_df[selection] = 1 + st.session_state["template_df"] = template_df + st.experimental_rerun() + if sel == "Yes": + with col2: + number = st.number_input("How many indiviuals are in your data?", min_value=0, step=1) + with col3: + s = st.checkbox("Ready for input?") + if sel == "Yes" and s: + indiv = [*range(1, number + 1, 1)] + df = ParsingModule.fill_in_from_list(template_df, "characteristics[individual]", indiv) + update_session_state(df) + if sel == "Not available": + template_df[selection] = "Not available" + st.session_state["template_df"] = template_df + st.experimental_rerun() + + +if selection == "comment[label]": + st.subheader("Input the label that was used in your experiment. If no label was added, indicate this using *label free sample*.") + all_label_elements = data_dict["all_label_elements"] + label_nodes = data_dict["label_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_label_elements, label_nodes, template_df + ) + update_session_state(df) + +if selection == "characteristics[organism]": + if "selected_species" not in st.session_state: + st.session_state["selected_species"] = set() + st.subheader("Select the species that is present in your sample") + if selection not in template_df.columns: + template_df[selection] = np.nan + multiple_in_one = False + index = template_df.columns.get_loc(selection) + col1, col2, col3, col4 = st.columns(4) + columns_to_adapt = [selection] + with col1: + multiple = st.radio(f"Are there multiple organisms in your data?", ("No", "Yes")) + if multiple == "Yes": + with col2: + number = st.number_input( + f"How many different organisms are in your data?", + min_value=0, + step=1) + with col3: + multiple_in_one_sel = st.radio(f"Are there multiple organisms within one sample?", ("No", "Yes")) + if multiple_in_one_sel == "Yes": + multiple_in_one = True + for i in range(number-1): + # add column next to the original column if it is not already there + if f"{selection}_{i+1}" not in template_df.columns: + template_df.insert(index+1, f"{selection}_{i+1}", "empty") + columns_to_adapt.append(f"{selection}_{i+1}") + if multiple == "No": + number = 1 + + col6, col7 = st.columns(2) + with col6: + model = st.radio('Does your data contain only classical model organisms?', ('Yes', 'No'), + help="Classical model organism being: Homo sapiens, Mus musculus, Drosophila Melanogaster, Arabidopsis thaliana, Xenopus laevis, Xenopus tropicalis, Saccharomyces cerevisiae, Caenorhabditis elegans, Danio rerio, Escherichia coli and Cavia porcellus") + classical_model_organisms = ["Homo sapiens", + "Mus musculus", + "Drosophila Melanogaster", + "Arabidopsis thaliana", + "Xenopus laevis", "Xenopus tropicalis", + "Saccharomyces cerevisiae", + "Caenorhabditis elegans", + "Danio rerio", + "Escherichia coli", + "Cavia porcellus"] + if model == 'Yes': + with col7: + sel = st.multiselect('Select the classical model organism here', classical_model_organisms, max_selections=number) + if isinstance(sel, list): + for i in sel: + if i is not None: + st.session_state.selected_species.add(i) + else: + if sel is not None: + st.session_state.selected_species.add(sel) + if model == 'No': + st.write('The NCBITaxon ontology data is loaded. Please allow a few seconds for the data to be loaded. ') + organism_data = load_organism_data() + if organism_data: + st.success(f"*NCBITaxon organism data was loaded*", icon="โœ…") + else: + st.error("Failed loading data", icon="โŒ") + st.write("""First, select your species type using the tabs below. + Then you can fill in the name of your species and suggested ontology terms will appear, from which you can select the correct term or the perfectly matched term. + If you want to consult the ontology tree structure, you can click the button below to the OLS search page.""") + url = "https://www.ebi.ac.uk/ols/ontologies/ncbitaxon" + button = f'OLS NCBITaxon ontology tree' + st.write(button, unsafe_allow_html=True) + + tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(['Eukaryota', 'Archaea', 'Bacteria', 'Viruses', 'Unclassified', 'Other']) + with tab1: + ret= organism_selection("eukaryota") + if isinstance(ret, list): + for i in ret: + st.session_state.selected_species.add(i) + else: + st.session_state.selected_species.add(ret) + with tab2: + ret= organism_selection("archaea") + if ret != None: + #if ret is a list, add all elements to the set + if isinstance(ret, list): + for i in ret: + st.session_state.selected_species.add(i) + else: + st.session_state.selected_species.add(ret) + with tab3: + ret= organism_selection("bacteria") + if ret != None: + #if ret is a list, add all elements to the set + if isinstance(ret, list): + for i in ret: + st.session_state.selected_species.add(i) + else: + st.session_state.selected_species.add(ret) + + with tab4: + ret= organism_selection("virus") + if ret != None: + #if ret is a list, add all elements to the set + if isinstance(ret, list): + for i in ret: + st.session_state.selected_species.add(i) + else: + st.session_state.selected_species.add(ret) + + with tab5: + ret= organism_selection("unclassified") + if ret != None: + #if ret is a list, add all elements to the set + if isinstance(ret, list): + for i in ret: + st.session_state.selected_species.add(i) + else: + st.session_state.selected_species.add(ret) + + with tab6: + ret= organism_selection("other_sequences") + if ret != None: + #if ret is a list, add all elements to the set + if isinstance(ret, list): + for i in ret: + st.session_state.selected_species.add(i) + else: + st.session_state.selected_species.add(ret) + if st.session_state['selected_species'] != {None}: + st.write(f"You selected the following species:{st.session_state['selected_species']}") + if len(st.session_state["selected_species"]) > number: + st.error(f"""Number of selected species is {len(st.session_state['selected_species'])}, but this should be {number} according to the input above. + Select the species you cant to remove from the list below""") + # checkbox to remove species + remove_species = set() + for i, element in enumerate(st.session_state["selected_species"]): + if st.checkbox(f"{element}", key=i): + remove_species.add(element) + st.session_state["selected_species"] = st.session_state["selected_species"] - remove_species + st.write(f"Selected species after removal: {st.session_state['selected_species']}") + elif (len(st.session_state["selected_species"]) < number) and (len(st.session_state["selected_species"]) > 0): + st.error(f"""Number of selected species is {len(st.session_state['selected_species'])}, but this should be {number} according to the input above. + Please select {number-len(st.session_state['selected_species'])} more organisms.""") + s = st.checkbox("Ready for input?") + if s: + input_list = list(st.session_state["selected_species"]) + if multiple_in_one: + #add "" in the beginning of the list + input_list.insert(0, "NA") + df = ParsingModule.fill_in_from_list(template_df, selection, input_list, multiple_in_one) + update_session_state(df) + #remove the session state + del st.session_state["selected_species"] + #remove the organism_data from the session state + +if selection == "characteristics[organism part]": + st.write("Select the part of the organism that is present in your sample") + all_orgpart_elements = data_dict["all_organism_part_elements"] + orgpart_nodes = data_dict["organism_part_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_orgpart_elements, orgpart_nodes, template_df, multiple_in_one=True + ) + update_session_state(df) + +if selection == "comment[reduction reagent]": + st.write("Input the reduction reagent that was used in your experiment") + all_reduction_elements = data_dict["all_reduction_reagent_elements"] + reduction_nodes = data_dict["reduction_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_reduction_elements, reduction_nodes, template_df + ) + update_session_state(df) + +if selection == "characteristics[sex]": + col1, col2, col3 = st.columns(3) + with col1: + sel = st.selectbox("Are there multiple sexes in your data?", ("", "No", "Yes", "Not available")) + if sel == "No": + sel2 = st.selectbox("Select the sex of your sample", ("", "F", "M", "unknown")) + if sel2 in ["F", "M", "unknown"]: + template_df[selection] = sel2 + st.session_state["template_df"] = template_df + st.experimental_rerun() + if sel == "Yes": + df = ParsingModule.fill_in_from_list(template_df, "characteristics[sex]", ["F", "M", "unknown"]) + update_session_state(df) + if sel == "Not available": + template_df[selection] = "Not available" + st.session_state["template_df"] = template_df + st.experimental_rerun() + +if selection == "comment[technical replicate]": + col1, col2, col3 = st.columns(3) + with col1: + sel = st.selectbox("Do you have technical replicates?", ("", "Yes", "No")) + if sel == "No": + template_df[selection] = 1 + st.session_state["template_df"] = template_df + st.experimental_rerun() + if sel == "Yes": + with col2: + number = st.number_input("How many technical replicates are in your data?", min_value=0, step=1) + with col3: + s = st.checkbox("Ready for input?") + if sel== "Yes" and s: + tech_rep = [*range(1, number + 1, 1)] + df = ParsingModule.fill_in_from_list(template_df, selection, tech_rep) + update_session_state(df) + + + +if selection == "characteristics[biological replicate]": + col1, col2, col3 = st.columns(3) + with col1: + sel = st.selectbox("Do you have biological replicates?", ("", "Yes", "No")) + if sel == "No": + template_df[selection] = 1 + st.session_state["template_df"] = template_df + st.experimental_rerun() + if sel == "Yes": + with col2: + number = st.number_input("How many biological replicates are there?", min_value=0, step=1) + with col3: + s = st.checkbox("Ready for input?") + if sel== "Yes" and s: + biol_rep = [*range(1, int(number) + 1, 1)] + template_df = ParsingModule.fill_in_from_list(template_df, selection, biol_rep) + st.session_state["template_df"] = template_df + #st.experimental_rerun() + +if selection == "comment[fragment mass tolerance]": + st.subheader(""" Input the fragment mass tolerance and **the unit (ppm or Da)**. click Update twice when finished""") + df = ParsingModule.fill_in_from_list(template_df, selection) + update_session_state(df) + +if selection == "comment[precursor mass tolerance]": + st.subheader(""" Input the precursor mass tolerance and **the unit (ppm or Da)**. click Update twice when finished""") + df = ParsingModule.fill_in_from_list(template_df, selection) + update_session_state(df) + + + +if selection == "characterics[synthetic peptide]": + st.subheader( + "If the sample is a synthetic peptide library, indicate this by selecting *synthetic* or *not synthetic*" + ) + df = ParsingModule.fill_in_from_list( + template_df, selection, ["synthetic", "not synthetic"] + ) + update_session_state(df) + +if selection == "comment[depletion]": + depl = st.selectbox("Is the sample depleted?", ("","Yes", "No")) + if depl == "Yes": + st.write("Indicate depleted or bound fraction directly in the SDRF file") + df = ParsingModule.fill_in_from_list( + template_df, selection, ["depleted fraction", "bound fraction"] + ) + update_session_state(df) + if depl == "No": + template_df["comment[depletion]"] = "not depletion" + st.session_state["template_df"] = template_df + st.experimental_rerun() + +if selection == "comment[modification parameters]": + st.write(""" First select the modifications that are in your sample using the drop down autocomplete menu. After selection you will need to choose the modification type, position and target amino acid. + Modification type can be fixed, variable or annotated. Annotated is used to search for all the occurrences of the modification into an annotated protein database file like UNIPROT XML or PEFF. + Position can be anywhere, protein N-term, protein C-term, any N-term or any C-term. + Target amino acid can be any amino acid or X if it's not in the list. Yoy can also select multiple amino acids.""") + st.write(""" If the modification of your choice is not available in the drop down list, select "Other" and input the modification name, chemical formula and mass of your custom modification.""") + st.write(""" The final modification will be formatted following the SDRF guidelines after which you can click on "Submit modifications".""") + unimod = data_dict["unimod_dict"] + inputs = sorted(list(unimod.keys())) + inputs.append("Other") + inputs.remove("#NAAM?") + mt = ["Fixed", "Variable", "Annotated"] + pp = ["Anywhere", "Protein N-term", "Protein C-term", "Any N-term", "Any C-term"] + ta = ["X","G","A","L","M","F","W","K","Q","E","S","P","V","I","C","Y","H","R","N","D","T"] + mods_sel = st.multiselect("Select the modifications present in your data", inputs) + sdrf_mods = [] + st.session_state["sdrf_mods"] = sdrf_mods + + for i in mods_sel: + st.write(f"**{i}**") + col1, col2, col3, col4 = st.columns(4) + + if i == "Other": + with col1: + name = st.text_input( + "Input a logical name for your custom modification" + ) + with col2: + form = st.text_input("Input the chemical formula of the modification") + with col3: + mass = st.text_input("Input the mass of the modification") + with col4: + final_str = f"NT={name};CF={form};MM={mass}" + st.write( + f""" **Final SDRF notation of modification:** + {final_str}""" + ) + done = st.button( + "Okay", + key=f"done_{i}", + help="Click to add the modification to the SDRF file", + on_click=st.session_state["sdrf_mods"].append(final_str), + ) + + else: + with col1: + mt_sel = st.selectbox("Select the modification type", mt, key=f"mt_{i}") + with col2: + pp_sel = st.selectbox( + "Select the position of the modification", pp, key=f"pp_{i}" + ) + with col3: + ta_sel = st.multiselect("Select the target amino acid", ta, key=f"ta_{i}") + with col4: + final_str = f"{unimod[i]};MT={mt_sel};PP={pp_sel};TA={ta_sel}" + st.session_state["sdrf_mods"].append(final_str) + st.write( + f"""**Final SDRF notation of modification:** + {final_str}""" + ) + st.write(f"Confirmed modifications contain: {st.session_state['sdrf_mods']}") + submit = st.checkbox( + "Submit modifications", + help="Click to add the modifications to the SDRF file. If everything looks fine, click again", + ) + + if submit: + # for every element in the list sdrf_mods + # add it to the template_df as a value in a new column with name selection_1, selection_2, selection_3, etc + # then update the session state + #get the index of the original column + index = template_df.columns.get_loc(selection) + #insert the new column next to the original index + for i, mod in enumerate(sdrf_mods): + st.write(i, mod) + if i == 0: + template_df[f"{selection}"] = mod + else: + template_df.insert(index+i, f"{selection}_{i}", mod) + index += 1 + + st.session_state["template_df"] = template_df + st.write(template_df) + +if selection == "undo column": + st.write("""Here you can select a column that you want to reannotate. + Upon clicking the column the current values will be removed and you can reannotate the column. + """) + col1, col2 = st.columns(2) + with col1: + sel = st.multiselect("Select the column(s) you want to reannotate", template_df.columns) + with col2: + if st.button("Reannotate"): + template_df[sel] = np.nan + st.session_state["template_df"] = template_df + st.experimental_rerun() + diff --git a/pages/4_4. Additional_columns.py b/pages/4_4. Additional_columns.py new file mode 100644 index 0000000..83a4d56 --- /dev/null +++ b/pages/4_4. Additional_columns.py @@ -0,0 +1,826 @@ +import streamlit as st +import ParsingModule +import pandas as pd +import numpy as np +import re +import warnings +warnings.filterwarnings("ignore") + +st.set_page_config( + page_title="SDRF creation tool", + layout="wide", + page_icon="๐Ÿงช", + menu_items={ + "Get help": "https://github.com/TineClaeys/SDRF_GUI", + "Report a bug": "https://github.com/TineClaeys/SDRF_GUI", + }, +) + + +def update_session_state(df): + st.session_state["template_df"] = df + +def check_age_format(df, column): + for index, row in df.iterrows(): + if (row[column] != "") and (row[column] != "empty") and (row[column] != "None"): + st.write(row[column]) + if not re.match(r"^\d+Y\s\d+M\s\d+D$", row[column]): + return False + return True + + +st.title("""4. Additional columns""") +# Get filled in template_df from other page +# if template_df is not in the session state, don't run all the code below +if "template_df" not in st.session_state: + st.error("Please fill in the template file in the Home page first", icon="๐Ÿšจ") + st.stop() +else: + template_df = st.session_state["template_df"] + with st.container(): + st.write("**This is your current SDRF file.**") + st.dataframe(template_df) + +data_dict = st.session_state["data_dict"] +unimod = st.session_state["unimod"] + + +all_possible_columns = [ + "characteristics[age]", + "characteristics[ancestry category]", + "characteristics[biological replicate]", + "characteristics[cell line]", + "characteristics[cell type]", + "characteristics[developmental stage]", + "characteristics[disease]", + "characteristics[individual]", + "characteristics[organism part]", + "characteristics[organism]", + "characteristics[sex]", + "characteristics[enrichment process]", + "characteristics[compound]", + "characteristics[concentration of compound]", + "comment[modification parameters]", + "comment[cleavage agent details]", + "comment[data file]", + "comment[fraction identifier]", + "comment[instrument]", + "comment[label]", + "comment[technical replicate]", + "comment[fragment mass tolerance]", + "comment[precursor mass tolerance]", + "comment[dissociation method]", + "comment[depletion]", + "comment[collision energy]" +] +# to still add: + #"characteristics[phenotype]", + # "characteristics[spiked compound]", + # characteristcs[mass] + # "characteristics[synthetic peptide]", + # +def update_sidebar(df): + #get columns from df that are not empty + used_columns = df.columns[df.isnull().mean() < 1] + additional_columns = sorted(set(all_possible_columns) - set(used_columns)) + return additional_columns + + +side_bar_columns = update_sidebar(template_df) +side_bar_columns.insert(0, "start") +side_bar_columns.insert(1, "factor value") +side_bar_columns.append("undo column") +with st.sidebar: + selection = st.radio("These are all possible columns you may want to add:", side_bar_columns) + download = st.download_button("Press to download SDRF file",ParsingModule.convert_df(template_df), "intermediate_SDRF.sdrf.tsv", help="download your SDRF file") + + +if selection == "start": + st.write("""There are still columns that you can add to your SDRF file. + Similar to the previous section, you can select a column to annotate in the sidebar. When a column is filled, it will disappear from the sidebar so you can keep track on which columns you can still add. + When you want to reannotate a previously filled in column, you can do so by clicking on the **undo column** button in the sidebar.""") + st.write(""" At the bottom of the page there are now two download buttons. The first one will download your intermediate SDRF file. + The second button will download your final SDRF file after some final checks. """) + +if selection == "factor value": + + st.write( + """You can choose one column that defines the **factor value** in your experiment. \nThis column indicates which experimental factor/variable is used as the hypothesis to perform the data analysis. \nIf there are multiple factor values, we suggest to make multiple SDRF files to avoid confusion concerning biological and technical replicates.""" + ) + col1, col2 = st.columns(2) + with col1: + factor_selection = st.multiselect("Select a factor value:", template_df.columns) + with col2: + + save = st.checkbox("Save factor value?") + if save: + for i in factor_selection: + st.write(i) + fv_name = f"factor value[{i.split('[')[-1].split(']')[0]}]" + # make a copy of the original column of i and store as new column called fv_name + template_df[fv_name] = template_df[i] + st.session_state["template_df"] = template_df + st.write(template_df) + +if selection == "technology type": + with st.form("Choose the technology type in your sample"): + tech_type = st.radio( + "Choose the technology type from these options:", + ( + "proteomic profiling by mass spectrometry", + "metabolomics profiling by mass spectrometry", + "other", + ), + ) + if st.form_submit_button("Submit"): + template_df["technology type"] = tech_type + st.session_state["template_df"] = template_df + +if selection == "characteristics[age]": + st.subheader("Input the ages of your samples using the Years Months Days format, e.g. 1Y 2M 3D") + multiple = st.selectbox(f"Are there multiple ages in your data?", ("","No", "Yes", "Not available"), help="If you select Not available, the column will be filled in with 'Not available'") + if multiple == "Yes": + template_df = ParsingModule.fill_in_from_list(template_df, "characteristics[age]") + if check_age_format(template_df, "characteristics[age]") == False: + st.error("The age column is not in the correct format, please check and try again") + st.stop() + elif check_age_format(template_df, "characteristics[age]") == True: + st.success("The age column is in the correct format") + update_session_state(template_df) + st.experimental_rerun() + if multiple == "No": + age = st.text_input("Input the age of your sample in Y M D format e.g. 12Y 3M 4D", help="As you only have one age, the inputted age will be immediatly used to fill all cells in the age column") + # check if the age is in Y M D format + if (age != "") and (not re.match(r"^\d+Y\s\d+M\s\d+D$", age)): + st.error("The age is not in the correct format, please check and try again",icon="๐Ÿšจ") + st.stop() + if (age != "") and (re.match(r"^\d+Y\s\d+M\s\d+D$", age)): + st.write("The age is in the correct format") + template_df["characteristics[age]"] = age + update_session_state(template_df) + st.experimental_rerun() # reruns script, makes it unnecessary to click button twice + if multiple == "Not available": + template_df["characteristics[age]"] = "Not available" + update_session_state(template_df) + st.experimental_rerun() + +if selection == "comment[alkylation reagent]": + st.subheader("Input the alkylation reagent that was used in your experiment") + all_alkylation_elements = data_dict["all_alkylation_elements"] + alkylation_nodes = data_dict["alkylation_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_alkylation_elements, alkylation_nodes, template_df + ) + update_session_state(df) + +if selection == "characteristics[ancestry category]": + st.subheader("Input the ancestry of your samples") + all_ancestry_elements = data_dict["all_ancestry_category_elements"] + ancestry_nodes = data_dict["ancestry_category_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_ancestry_elements, ancestry_nodes, template_df + ) + update_session_state(df) + + +if selection == "characteristics[cell type]": + # if the selection is not in the columns, add it as an empty column + st.subheader("Input the cell type of your sample") + all_cell_type = data_dict["all_cell_type_elements"] + cell_type_nodes = data_dict["cell_type_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_cell_type, cell_type_nodes, template_df + ) + update_session_state(df) + +if selection == "characteristics[cell line]": + st.subheader("Input the cell line of your sample if one was used") + all_cellline = data_dict["all_cell_line_elements"] + cellline_nodes = data_dict["cell_line_nodes"] + df = ParsingModule.multiple_ontology_tree(selection, all_cellline, cellline_nodes, template_df, multiple_in_one=True) + update_session_state(df) + +if selection == "comment[cleavage agent details]": + st.subheader("Input the cleavage agent details of your sample") + cleavage_list = data_dict["all_cleavage_list_elements"] + enzymes = st.multiselect( + "Select the cleavage agents used in your sample If no cleavage agent was used e.g. in top down proteomics, choose *no cleavage*", + cleavage_list, + ) + s = st.checkbox("Ready for input?") + + if s and len(enzymes) == 1: + template_df[selection] = enzymes[0] + st.experimental_rerun() + if s: + df = ParsingModule.fill_in_from_list(template_df, selection, enzymes) + update_session_state(df) + +if selection == "characteristics[compound]": + if selection not in template_df.columns: + template_df[selection] = np.nan + st.subheader("If a compound was added to your sample, input the name here") + compounds = [] + + col1, col2, col3 = st.columns(3) + index = template_df.columns.get_loc(selection) + with col1: + input_compounds = st.text_input("Input compound names as comma separated list") + with col2: + multiple_in_one_sel = st.radio("Are there multiple compounds within the same sample?", ("No", "Yes")) + with col3: + ready = st.checkbox('Ready?') + if input_compounds is not None: + input_compounds = re.sub(" ", "", input_compounds) + input_compounds = input_compounds.split(",") + compounds.append(input_compounds) + compounds = compounds[0] + st.write(compounds) + if compounds and ready: + if multiple_in_one_sel == "Yes": + columns_to_adapt = [] + multiple_in_one = True + for i in range(len(compounds)-1): + # add column next to the original column if it is not already there + if f"{selection}_{i+1}" not in template_df.columns: + template_df.insert(index+1, f"{selection}_{i+1}", "empty") + columns_to_adapt.append(f"{selection}_{i+1}") + else: + multiple_in_one = False + df = ParsingModule.fill_in_from_list(template_df, selection, compounds, multiple_in_one) + update_session_state(df) + st.experimental_rerun() + + +if selection == "characteristics[concentration of compound]": + if selection not in template_df.columns: + template_df[selection] = np.nan + st.subheader("Input the concentration with which the compound was added to your sample") + concentration = [] + col1, col2, col3 = st.columns(3) + index = template_df.columns.get_loc(selection) + with col1: + input_concentrations = st.text_input("Input compound concentrations as comma separated list") + with col2: + multiple_in_one_sel = st.radio("Are there multiple concentrations within the same sample?", ("No", "Yes")) + with col3: + ready = st.checkbox('Ready?') + if input_concentrations is not None: + input_concentrations = re.sub(" ", "", input_concentrations) + input_concentrations = input_concentrations.split(",") + concentration.append(input_concentrations) + concentration = concentration[0] + st.write(concentration) + if concentration and ready: + if multiple_in_one_sel == "Yes": + columns_to_adapt = [] + multiple_in_one = True + for i in range(len(concentration)-1): + # add column next to the original column if it is not already there + if f"{selection}_{i+1}" not in template_df.columns: + template_df.insert(index+1, f"{selection}_{i}", "empty") + columns_to_adapt.append(f"{selection}_{i}") + else: + multiple_in_one = False + df = ParsingModule.fill_in_from_list(template_df, selection, concentration, multiple_in_one) + update_session_state(df) + st.experimental_rerun() + +if selection == "comment[collision energy]": + st.subheader("Input the collision energy that was used in your experiment") + multiple = st.radio( + f"Are there multiple collision energies in your data?", ("No", "Yes") + ) + if multiple == "Yes": + st.write( + "Input the collision energy directly in the SDRF file. Don't forget to indicate the unit" + ) + df = ParsingModule.fill_in_from_list(template_df, selection) + update_session_state(df) + else: + coll_en = st.text_input("Input the collision energy and its unit") + if coll_en: + df = ParsingModule.fill_in_from_list(template_df, selection, values_list=[coll_en]) + update_session_state(df) + st.experimental_rerun() + +if selection == "characteristics[developmental stage]": + st.subheader("Input the developmental stage of your sample") + all_devstage = data_dict["all_developmental_stage_elements"] + devstage_nodes = data_dict["developmental_stage_nodes"] + df = ParsingModule.multiple_ontology_tree(selection, all_devstage, devstage_nodes, template_df, multiple_in_one=False) + update_session_state(df) + +if selection == "characteristics[disease]": + st.subheader("If you have healthy and control samples, indicate healthy samples using *normal*. Input the disease for the other samples using the ontology") + all_disease_type = data_dict["all_disease_elements"] + disease_nodes = data_dict["disease_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_disease_type, disease_nodes, template_df, multiple_in_one = True + ) + update_session_state(df) + +if selection == "comment[dissociation method]": + st.subheader("Input the dissociation method that was used in your experiment") + all_dissociation_elements = data_dict["all_dissociation_elements"] + dissociation_nodes = data_dict["dissociation_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_dissociation_elements, dissociation_nodes, template_df, multiple_in_one = True) + update_session_state(df) + +if selection == "characteristics[enrichment process]": + st.subheader("Input the enrichment process that was used in your experiment") + all_enrichment_elements = data_dict["all_enrichment_elements"] + enrichment_nodes = data_dict["enrichment_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_enrichment_elements, enrichment_nodes, template_df, multiple_in_one = False + ) + update_session_state(df) + +if selection == "comment[fraction identifier]": + # first ask if they have fractionation + # if they don't ==> fraction identifier = 1 + # if they do: add fraction identifiers + add fractionation method + number_of_fractions = None + col1, col2 = st.columns(2) + with col1: + multiple = st.selectbox( + f"Are there multiple fractions in your data?", ("", "No", "Yes") + ) + number_of_methods = None + if multiple == "Yes": + with col2: + number_of_fractions = st.number_input( + f"How many different fractions are in your data?", + min_value=0, + step=1, + ) + number_of_methods = st.number_input( + f"How many different fractionation methods are used?", + min_value=0, + step=1, + ) + if multiple == "No": + number_of_fractions = 1 + + if number_of_methods: + with st.form("Provide details on the fractionation method"): + # template_df["comment[ fractionation method]"] = "" + fractionation_elements = data_dict["all_fractionation_method_elements"] + fractionation_nodes = data_dict["fractionation_nodes"] + col3, col4 = st.columns(2) + with col3: + # selectbox with search option + fractionation_elements.append(" ") + fractionation_elements = set(fractionation_elements) + return_search = st.multiselect( + "Select your matching fractionation term using this autocomplete function", + fractionation_elements, + max_selections=number_of_methods, + ) + with col4: + st.write("Or follow the ontology based drop down menu below") + return_select = tree_select( + fractionation_nodes, + no_cascade=True, + expand_on_click=True, + check_model="leaf", + ) + + if (len(return_select["checked"]) > 1) & ( + len(return_select["checked"]) != number_of_methods + ): + st.error(f"You need to select a total of {number_of_methods}.") + all = return_search + return_select["checked"] + all = [i.split(",")[-1] for i in all if i is not None] + if (len(all) >= 1) & (len(all) != number_of_methods): + st.error(f"You need to select a total of {number_of_methods}.") + x = st.form_submit_button("Submit selection") + if x: + st.write(f"Selection contains: {all}") + + if x: + st.write(x) + df = ParsingModule.fill_in_from_list(template_df, "comment[fractionation method]", all) + df = ParsingModule.fill_in_from_list(template_df, "comment[fraction identifier]", [*range(1, number_of_fractions + 1)]) + update_session_state(df) + +if selection == "comment[instrument]": + st.subheader("Input the instrument that was used in your experiment") + all_instrument_elements = data_dict["all_instrument_elements"] + instrument_nodes = data_dict["instrument_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_instrument_elements, instrument_nodes, template_df + ) + update_session_state(df) + +if selection == "characteristics[individual]": + col1, col2, col3 = st.columns(3) + with col1: + sel = st.selectbox("Do you have multiple indiviuals in your data?", ("", "No", "Yes")) + if sel == "No": + template_df[selection] = 1 + st.session_state["template_df"] = template_df + st.experimental_rerun() + if sel == "Yes": + with col2: + number = st.number_input("How many indiviuals are in your data?", min_value=0, step=1) + with col3: + s = st.checkbox("Ready for input?") + if sel == "Yes" and s: + indiv = [*range(1, number + 1, 1)] + df = ParsingModule.fill_in_from_list(template_df, "characteristics[individual]", indiv) + update_session_state(df) + + +if selection == "comment[label]": + st.subheader("Input the label that was used in your experiment") + all_label_elements = data_dict["all_label_elements"] + label_nodes = data_dict["label_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_label_elements, label_nodes, template_df + ) + update_session_state(df) + + +if selection == "characteristics[organism]": + if "selected_species" not in st.session_state: + st.session_state["selected_species"] = set() + st.subheader("Select the species that is present in your sample") + if selection not in template_df.columns: + template_df[selection] = np.nan + multiple_in_one = False + index = template_df.columns.get_loc(selection) + col1, col2, col3, col4 = st.columns(4) + columns_to_adapt = [selection] + with col1: + multiple = st.radio(f"Are there multiple organisms in your data?", ("No", "Yes")) + if multiple == "Yes": + with col2: + number = st.number_input( + f"How many different organisms are in your data?", + min_value=0, + step=1) + with col3: + multiple_in_one_sel = st.radio(f"Are there multiple organisms within one sample?", ("No", "Yes")) + if multiple_in_one_sel == "Yes": + multiple_in_one = True + for i in range(number-1): + # add column next to the original column if it is not already there + if f"{selection}_{i+1}" not in template_df.columns: + df.insert(index+1, f"{selection}_{i+1}", "empty") + columns_to_adapt.append(f"{selection}_{i+1}") + if multiple == "No": + number = 1 + + col4, col5 = st.columns(2) + with col4: + st.write('Select your species using the tabs below. If you want to consult the ontology tree structure, you can click the button to the OLS search page.') + with col5: + url = "https://www.ebi.ac.uk/ols/ontologies/ncbitaxon" + button = f'OLS NCBITaxon ontology tree' + st.write(button, unsafe_allow_html=True) + + tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(['Eukaryota', 'Archaea', 'Bacteria', 'Viruses', 'Unclassified', 'Other']) + with tab1: + eu_elem = data_dict["all_eukaryota_elements"] + search_term = st.text_input("Search for an eukaryote species here", "") + ret = ParsingModule.autocomplete_species_search(eu_elem, search_term) + if ret != None: + #if ret is a list, add all elements to the set + if isinstance(ret, list): + for i in ret: + st.session_state.selected_species.add(i) + else: + st.session_state.selected_species.add(ret) + + with tab2: + ar_elem = data_dict["all_archaea_elements"] + search_term = st.text_input("Search for an archaea species here", "") + ret = ParsingModule.autocomplete_species_search(ar_elem, search_term) + if ret != None: + #if ret is a list, add all elements to the set + if isinstance(ret, list): + for i in ret: + st.session_state.selected_species.add(i) + else: + st.session_state.selected_species.add(ret) + with tab3: + ba_elem = data_dict["all_bacteria_elements"] + search_term = st.text_input("Search for a bacteria species here", "") + ret = ParsingModule.autocomplete_species_search(ba_elem, search_term) + if ret != None: + #if ret is a list, add all elements to the set + if isinstance(ret, list): + for i in ret: + st.session_state.selected_species.add(i) + else: + st.session_state.selected_species.add(ret) + + with tab4: + vi_elem = data_dict["all_virus_elements"] + search_term = st.text_input("Search for a viral strain here", "") + ret = ParsingModule.autocomplete_species_search(vi_elem, search_term) + if ret != None: + #if ret is a list, add all elements to the set + if isinstance(ret, list): + for i in ret: + st.session_state.selected_species.add(i) + else: + st.session_state.selected_species.add(ret) + + with tab5: + un_elem = data_dict["all_unclassified_elements"] + search_term = st.text_input("Search for an unclassified species here", "") + ret = ParsingModule.autocomplete_species_search(un_elem, search_term) + if ret != None: + #if ret is a list, add all elements to the set + if isinstance(ret, list): + for i in ret: + st.session_state.selected_species.add(i) + else: + st.session_state.selected_species.add(ret) + + with tab6: + other_elem = data_dict["all_other_sequences_elements"] + search_term = st.text_input("Search for a species here", "") + ret = ParsingModule.autocomplete_species_search(other_elem, search_term) + if ret != None: + #if ret is a list, add all elements to the set + if isinstance(ret, list): + for i in ret: + st.session_state.selected_species.add(i) + else: + st.session_state.selected_species.add(ret) + + st.write(st.session_state["selected_species"]) + if len(st.session_state["selected_species"]) > number: + st.error(f"""Number of selected species is {len(st.session_state['selected_species'])}, but this should be {number} according to the input above. + Select the species you cant to remove from the list below""") + # checkbox to remove species + remove_species = set() + for i, element in enumerate(st.session_state["selected_species"]): + if st.checkbox(f"{element}", key=i): + remove_species.add(element) + st.session_state["selected_species"] = st.session_state["selected_species"] - remove_species + st.write(f"Selected species after removal: {st.session_state['selected_species']}") + df = ParsingModule.fill_in_from_list(template_df, selection, list(st.session_state["selected_species"]), multiple_in_one) + update_session_state(df) + + +if selection == "characteristics[organism part]": + st.subheader("Select the part of the organism that is present in your sample") + all_orgpart_elements = data_dict["all_organism_part_elements"] + orgpart_nodes = data_dict["orgpart_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_orgpart_elements, orgpart_nodes, template_df + ) + update_session_state(df) + +if selection == "comment[reduction reagent]": + st.subheader("Input the reduction reagent that was used in your experiment") + all_reduction_elements = data_dict["all_reduction_reagent_elements"] + reduction_nodes = data_dict["reduction_nodes"] + df = ParsingModule.multiple_ontology_tree( + selection, all_reduction_elements, reduction_nodes, template_df + ) + update_session_state(df) + + +if selection == "characteristics[sex]": + col1, col2, col3 = st.columns(3) + with col1: + sel = st.selectbox("Are there multiple sexes in your data?", ("", "No", "Yes", "Not available")) + if sel == "No": + sel2 = st.selectbox("Select the sex of your sample", ("", "F", "M", "unknown")) + if sel2 in ["F", "M", "unknown"]: + template_df[selection] = sel2 + st.session_state["template_df"] = template_df + st.experimental_rerun() + if sel == "Yes": + df = ParsingModule.fill_in_from_list(template_df, "characteristics[sex]", ["F", "M", "unknown"]) + update_session_state(df) + if sel == "Not available": + template_df[selection] = "Not Available" + st.session_state["template_df"] = template_df + st.experimental_rerun() + +if selection == "comment[technical replicate]": + col1, col2, col3 = st.columns(3) + with col1: + sel = st.selectbox("Do you have technical replicates?", ("", "Yes", "No")) + if sel == "No": + template_df[selection] = 1 + st.session_state["template_df"] = template_df + st.experimental_rerun() + if sel == "Yes": + with col2: + number = st.number_input("How many technical replicates are in your data?", min_value=0, step=1) + with col3: + s = st.checkbox("Ready for input?") + if sel== "Yes" and s: + tech_rep = [*range(1, number + 1, 1)] + df = ParsingModule.fill_in_from_list(template_df, selection, tech_rep) + update_session_state(df) + + + +if selection == "characteristics[biological replicate]": + col1, col2, col3 = st.columns(3) + with col1: + sel = st.selectbox("Do you have biological replicates?", ("", "Yes", "No")) + if sel == "No": + template_df[selection] = 1 + st.session_state["template_df"] = template_df + st.experimental_rerun() + if sel == "Yes": + with col2: + number = st.number_input("How many biological replicates are there?", min_value=0, step=1) + with col3: + s = st.checkbox("Ready for input?") + if sel== "Yes" and s: + biol_rep = [*range(1, int(number) + 1, 1)] + st.write(biol_rep) + template_df = ParsingModule.fill_in_from_list(template_df, selection, biol_rep) + st.session_state["template_df"] = template_df + #st.experimental_rerun() + +if selection == "comment[fragment mass tolerance]": + with st.form("Fragment mass tolerance"): + col1, col2 = st.columns(2) + with col1: + multiple = st.radio( + "Are there multiple fragment mass tolerances in your data?", + ("Yes", "No"), + ) + with col2: + unit = st.radio( + "Is the fragment mass tolerance in ppm or Da?", ("ppm", "Da") + ) + s = st.form_submit_button("Input") + + if s and multiple == "Yes" and unit: + st.write( + "Input the fragment mass tolerance directly in the SDRF file (without unit)" + ) + df = ParsingModule.fill_in_from_list(template_df, selection) + df[selection] = df[selection].astype(str) + " " + str(unit) + update_session_state(df) + st.experimental_rerun() + elif s and multiple == "No" and unit: + fragment_mass = st.text_input("Input the fragment mass tolerance (without unit)") + if fragment_mass: + st.write(fragment_mass) + if isinstance(fragment_mass, str): + template_df[selection] = fragment_mass + " " + str(unit) + st.session_state["template_df"] = template_df + st.experimental_rerun() + +if selection == "comment[precursor mass tolerance]": + with st.form("Precursor mass tolerance"): + col1, col2 = st.columns(2) + with col1: + multiple = st.radio( + "Are there multiple precursor mass tolerances in your data?", + ("Yes", "No"), + ) + with col2: + unit = st.radio( + "Is the precursor mass tolerance in ppm or Da?", ("ppm", "Da") + ) + s = st.form_submit_button("Input") + + if s and multiple == "Yes" and unit: + st.write( + "Input the precursor mass tolerance directly in the SDRF file (without unit)" + ) + df = ParsingModule.fill_in_from_list(template_df, selection) + df[selection] = df[selection].astype(str) + " " + str(unit) + update_session_state(df) + st.experimental_rerun() + elif s and multiple == "No" and unit: + precursor_mass = st.text_input( + "Input the precursor mass tolerance (without unit)" + ) + st.write(precursor_mass) + if precursor_mass: + if isinstance(precursor_mass, str): + template_df[selection] = precursor_mass + " " + str(unit) + st.session_state["template_df"] = template_df + st.experimental_rerun() + + +if selection == "characterics[synthetic peptide]": + st.subheader( + "If the sample is a synthetic peptide library, indicate this by selecting *synthetic* or *not synthetic*" + ) + df = ParsingModule.fill_in_from_list( + template_df, selection, ["synthetic", "not synthetic"] + ) + update_session_state(df) + +if selection == "comment[depletion]": + depl = st.selectbox("Is the sample depleted?", ("","Yes", "No")) + if depl == "Yes": + st.write("Indicate depleted or bound fraction directly in the SDRF file") + df = ParsingModule.fill_in_from_list( + template_df, selection, ["depleted fraction", "bound fraction"] + ) + update_session_state(df) + if depl == "No": + template_df["comment[depletion]"] = "not depletion" + st.session_state["template_df"] = template_df + st.experimental_rerun() + +if selection == "comment[modification parameters]": + unimod = data_dict["unimod_dict"] + inputs = sorted(list(unimod.keys())) + inputs.append("Other") + inputs.remove("#NAAM?") + mt = ["Fixed", "Variable", "Annotated"] + pp = ["Anwywhere", "Protein N-term", "Protein C-term", "Any N-term", "Any C-term"] + ta = ["X","G","A","L","M","F","W","K","Q","E","S","P","V","I","C","Y","H","R","N","D","T"] + mods_sel = st.multiselect("Select the modifications present in your data", inputs) + sdrf_mods = [] + st.session_state["sdrf_mods"] = sdrf_mods + + for i in mods_sel: + st.write(f"**{i}**") + col1, col2, col3, col4 = st.columns(4) + + if i == "Other": + with col1: + name = st.text_input( + "Input a logical name for your custom modification" + ) + with col2: + form = st.text_input("Input the chemical formula of the modification") + with col3: + mass = st.text_input("Input the mass of the modification") + with col4: + final_str = f"NT={name};CF={form};MM={mass}" + st.write( + f""" **Final SDRF notation of modification:** + {final_str}""" + ) + done = st.button( + "Okay", + key=f"done_{i}", + help="Click to add the modification to the SDRF file", + on_click=st.session_state["sdrf_mods"].append(final_str), + ) + + else: + with col1: + mt_sel = st.selectbox("Select the modification type", mt, key=f"mt_{i}") + with col2: + pp_sel = st.selectbox( + "Select the position of the modification", pp, key=f"pp_{i}" + ) + with col3: + ta_sel = st.selectbox("Select the target amino acid", ta, key=f"ta_{i}") + with col4: + final_str = f"{unimod[i]};MT={mt_sel};PP={pp_sel};TA={ta_sel}" + st.write( + f"""**Final SDRF notation of modification:** + {final_str}""" + ) + done = st.button( + "Okay", + key=f"done_{i}", + help="Click to add the modification to the SDRF file", + on_click=st.session_state["sdrf_mods"].append(final_str), + ) + + submit = st.checkbox( + "Submit modifications", + help="Click to add the modifications to the SDRF file. If everything looks fine, click again", + ) + + if submit: + # for every element in the list sdrf_mods + # add it to the template_df as a value in a new column with name selection_1, selection_2, selection_3, etc + # then update the session state + for i, mod in enumerate(sdrf_mods): + st.write(i, mod) + template_df[f"{selection}_{i}"] = mod + template_df.drop(columns=[selection], inplace=True) + st.session_state["template_df"] = template_df + st.write(template_df) + +if selection == "undo column": + st.write("""Here you can select a column that you want to reannotate. + Upon clicking the column the current values will be removed and you can reannotate the column. + """) + col1, col2 = st.columns(2) + with col1: + sel = st.multiselect("Select the column(s) you want to reannotate", template_df.columns) + with col2: + if st.button("Reannotate"): + template_df[sel] = np.nan + + side_bar_columns = update_sidebar(template_df) + st.session_state["template_df"] = template_df + st.experimental_rerun() + + diff --git a/pages/5_5. Experiment_types.py b/pages/5_5. Experiment_types.py new file mode 100644 index 0000000..9aac51b --- /dev/null +++ b/pages/5_5. Experiment_types.py @@ -0,0 +1,92 @@ +import streamlit as st +import ParsingModule +import pandas as pd +import numpy as np +import re + +import warnings +warnings.filterwarnings("ignore") + + +# Define the default button color (you can adjust this as desired) +default_color = "#ffa478" +# Define the button CSS styles +button_styles = f""" + background-color: white; + color: {default_color}; + border-radius: 20px; + padding: 10px 20px; + border: none; + text-align: center; + text-decoration: none; + display: inline-block; + font-size: 16px; + margin: 4px 2px; + cursor: pointer; +""" +# Define the button CSS styles when it's clicked +clicked_styles = f""" + background-color: #ffa478; + color: white; +""" +st.set_page_config( + page_title="SDRF creation tool", + layout="wide", + page_icon="๐Ÿงช", + menu_items={ + "Get help": "https://github.com/TineClaeys/SDRF_GUI", + "Report a bug": "https://github.com/TineClaeys/SDRF_GUI", + }, +) + + +def update_session_state(df): + st.session_state["template_df"] = df + +st.title("""5. Experiment types""") +st.subheader(":construction: *Under development* :construction:") + +url = "https://github.com/TineClaeys/SDRF_GUI/issues" +button = f'Join the community effort' +with st.sidebar: + st.write(button, unsafe_allow_html=True) + +# Get filled in template_df from other page +# if template_df is not in the session state, don't run all the code below +if "template_df" not in st.session_state: + st.error("Please fill in the template file in the Home page first", icon="๐Ÿšจ") + st.stop() +else: + template_df = st.session_state["template_df"] + with st.container(): + st.write("**This is your current SDRF file.**") + st.dataframe(template_df) +data_dict = st.session_state["data_dict"] +unimod = st.session_state["unimod"] + +st.write("""Some experiment types have an atypical SDRF structure e.g. metaproteomics, immunopeptidomics, single cell, etc. Here you can find the community-suggested SDRF columns for such experiments. +If you want to add to this effort, please contact us via the button in the sidebar. +""") + +immunopeptidomics = st.button('Immunopeptidomics') +metaproteomics = st.button('Metaproteomics') +single_cell = st.button('Single cell proteomics') + +meta_proteomics_cols = ["characteristics[environmental material]", "characteristics[organism]", "characteristics[diet]", "characteristics[biome]", "characteristics[environmental condition]"] +for button, suggested_cols in zip([metaproteomics], [meta_proteomics_cols]): + if button: + col1, col2 = st.columns(2) + #check which suggested cols are already in the template and which ones are not + detected_cols = [col for col in suggested_cols if col in template_df.columns] + cols_to_add = [col for col in suggested_cols if col not in template_df.columns] + for i in detected_cols: + with col1: + st.success(f"The suggested column **{i}** is already in your SDRF file.", icon="โœ…") + for i in cols_to_add: + with col1: + st.error(f"The suggested column **{i}** is not in your SDRF file. Do you want to add it?", icon="โŒ") + with col2: + st.write("Suggested columns:") + for c in cols_to_add: + st.checkbox(f"Add **{c}** to SDRF file", key=c) + diff --git a/pages/columns_onto.txt b/pages/columns_onto.txt new file mode 100644 index 0000000..e0950da --- /dev/null +++ b/pages/columns_onto.txt @@ -0,0 +1,51 @@ + +""" +{'assay name', (run 1, run 2, run 3) + + 'characteristics[age]', ==> select from range Y M W D / or range + 'characteristics[ancestry category]', ==> ontology HANCESTRO:0004 + 'characteristics[biological replicate]', ==> select from range + 'characteristics[cell line]',==> ontology CL:0000000 , further drop down + 'characteristics[cell type]',==> ontology EFO:0000324 + 'characteristics[developmental stage]', ==> http://www.ebi.ac.uk/efo/EFO_0000399 + 'characteristics[disease]',==> ontology, EFO:0000408 + 'characteristics[individual]',==> select from range + 'characteristics[organism part]',==> ontology EFO:0000635 + 'characteristics[organism]', ==> ontology, OBI:0100026, from EFO + 'characteristics[sex]', ==> M/F/unknwown + + 'comment[cleavage agent details]', ==> MS:1001045 + 'comment[data file]', ==> filename + 'comment[fraction identifier]', ==> select from range (1 if not fractionated) + if fractionated: comment[ fractionation method]PRIDE:0000550 + 'comment[instrument]',==>ontology MS:1000031 + 'comment[label]', ==> label free, TMT channelsPRIDE:0000514 + 'comment[technical replicate]',==> select from range + 'source name', (sample 1, sample 2) + 'technology type', [proteomic profiling by mass spectrometry, ] + + ### to add to standard sdrf + protein modifications (unimod) + digestion enzyme + 'comment[fragment mass tolerance]', ppm or Da + 'comment[precursor mass tolerance]' + comment[dissociation method] MS:1000044 + + if enriched: + characteristics[enrichment process] EFO:0009090 +if characteristics[compound]: + characteristics[concentration of] ask for metric + + """ + + + + Cell type + ==> select all cell types available in your file + onto tree view: + https://github.com/Schluca/streamlit_tree_select ==> searchable? + ==> Are all samples the same cell type? Aka only one selected + Yes: fill in automatically + No: Drop down menu row wise + https://github.com/PablocFonseca/streamlit-aggrid/issues/168 + gb.configure_column("my_column", editable=True, cellEditor='agSelectCellEditor', cellEditorParams={'values': ['yes', 'no']}) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c1a31eb --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +pronto== 2.5.3 +streamlit==1.19.0 +streamlit-aggrid==0.3.4.post3 +streamlit-tree-select==0.0.5 +jsonschema==4.17.0 +zipp==3.10.0 +openpyxl== 3.1.1 \ No newline at end of file diff --git a/templates/PXD000548.sdrf.tsv b/templates/PXD000548.sdrf.tsv new file mode 100644 index 0000000..ec702ae --- /dev/null +++ b/templates/PXD000548.sdrf.tsv @@ -0,0 +1,161 @@ +source name characteristics[organism] characteristics[organism part] characteristics[age] characteristics[developmental stage] characteristics[sex] characteristics[ancestry category] characteristics[disease] characteristics[cell type] characteristics[individual] characteristics[biological replicate] material type assay name technology type comment[fractionation method] comment[technical replicate] comment[fraction identifier] comment[file uri] comment[ms2 mass analyzer] comment[instrument] comment[separation] comment[label] comment[cleavage agent details] comment[modification parameters] comment[modification parameters] comment[precursor mass tolerance] comment[fragment mass tolerance] comment[data file] +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 1 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 1 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice01_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice01_01.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 2 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 1 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice01_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice01_01.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 3 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 1 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice01_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice01_01.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 4 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 1 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice01_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice01_01.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 5 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 1 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice01_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice01_02.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 6 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 1 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice01_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice01_02.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 7 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 1 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice01_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice01_02.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 8 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 1 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice01_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice01_02.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 9 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 2 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice02_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice02_01.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 10 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 2 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice02_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice02_01.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 11 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 2 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice02_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice02_01.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 12 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 2 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice02_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice02_01.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 13 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 2 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice02_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice02_02.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 14 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 2 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice02_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice02_02.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 15 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 2 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice02_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice02_02.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 16 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 2 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice02_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice02_02.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 17 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 3 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice03_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice03_01.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 18 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 3 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice03_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice03_01.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 19 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 3 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice03_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice03_01.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 20 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 3 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice03_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice03_01.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 21 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 3 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice03_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice03_02.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 22 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 3 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice03_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice03_02.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 23 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 3 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice03_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice03_02.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 24 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 3 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice03_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice03_02.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 25 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 4 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice04_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice04_01.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 26 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 4 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice04_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice04_01.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 27 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 4 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice04_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice04_01.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 28 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 4 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice04_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice04_01.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 29 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 4 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice04_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice04_02.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 30 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 4 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice04_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice04_02.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 31 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 4 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice04_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice04_02.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 32 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 4 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice04_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice04_02.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 33 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 5 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice05_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice05_01.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 34 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 5 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice05_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice05_01.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 35 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 5 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice05_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice05_01.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 36 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 5 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice05_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice05_01.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 37 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 5 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice05_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice05_02.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 38 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 5 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice05_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice05_02.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 39 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 5 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice05_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice05_02.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 40 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 5 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice05_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice05_02.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 41 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 6 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice06_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice06_01.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 42 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 6 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice06_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice06_01.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 43 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 6 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice06_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice06_01.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 44 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 6 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice06_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice06_01.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 45 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 6 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice06_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice06_02.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 46 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 6 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice06_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice06_02.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 47 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 6 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice06_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice06_02.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 48 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 6 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice06_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice06_02.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 49 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 7 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice07_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice07_01.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 50 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 7 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice07_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice07_01.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 51 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 7 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice07_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice07_01.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 52 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 7 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice07_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice07_01.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 53 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 7 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice07_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice07_02.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 54 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 7 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice07_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice07_02.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 55 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 7 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice07_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice07_02.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 56 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 7 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice07_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice07_02.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 57 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 8 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice08_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice08_01.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 58 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 8 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice08_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice08_01.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 59 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 8 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice08_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice08_01.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 60 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 8 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice08_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice08_01.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 61 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 8 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice08_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice08_02.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 62 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 8 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice08_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice08_02.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 63 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 8 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice08_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice08_02.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 64 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 8 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice08_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice08_02.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 65 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 9 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice09_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice09_01.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 66 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 9 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice09_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice09_01.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 67 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 9 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice09_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice09_01.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 68 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 9 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice09_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice09_01.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 69 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 9 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice09_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice09_02.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 70 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 9 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice09_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice09_02.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 71 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 9 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice09_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice09_02.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 72 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 9 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice09_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice09_02.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 73 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 10 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice10_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice10_01.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 74 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 10 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice10_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice10_01.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 75 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 10 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice10_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice10_01.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 76 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 10 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice10_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice10_01.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 77 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 10 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice10_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice10_02.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 78 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 10 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice10_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice10_02.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 79 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 10 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice10_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice10_02.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 80 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 10 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice10_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice10_02.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 81 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 11 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice11_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice11_01.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 82 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 11 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice11_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice11_01.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 83 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 11 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice11_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice11_01.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 84 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 11 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice11_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice11_01.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 85 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 11 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice11_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice11_02.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 86 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 11 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice11_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice11_02.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 87 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 11 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice11_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice11_02.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 88 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 11 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice11_02.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice11_02.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 89 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 12 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice12_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice12_01.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 90 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 12 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice12_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice12_01.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 91 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 12 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice12_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice12_01.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 92 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 12 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice12_01.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice12_01.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 93 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 12 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice12_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice12_02-.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 94 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 12 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice12_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice12_02-.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 95 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 12 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice12_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice12_02-.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 96 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 12 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice12_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice12_02-.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 97 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 13 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice13_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice13_01-.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 98 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 13 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice13_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice13_01-.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 99 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 13 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice13_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice13_01-.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 100 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 13 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice13_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice13_01-.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 101 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 13 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice13_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice13_02-.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 102 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 13 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice13_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice13_02-.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 103 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 13 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice13_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice13_02-.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 104 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 13 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice13_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice13_02-.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 105 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 14 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice14_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice14_01-.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 106 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 14 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice14_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice14_01-.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 107 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 14 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice14_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice14_01-.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 108 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 14 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice14_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice14_01-.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 109 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 14 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice14_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice14_02-.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 110 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 14 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice14_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice14_02-.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 111 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 14 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice14_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice14_02-.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 112 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 14 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice14_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice14_02-.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 113 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 15 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice15_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice15_01-.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 114 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 15 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice15_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice15_01-.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 115 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 15 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice15_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice15_01-.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 116 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 15 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice15_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice15_01-.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 117 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 15 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice15_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice15_02-.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 118 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 15 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice15_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice15_02-.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 119 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 15 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice15_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice15_02-.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 120 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 15 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice15_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice15_02-.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 121 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 16 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice16_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice16_01-.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 122 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 16 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice16_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice16_01-.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 123 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 16 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice16_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice16_01-.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 124 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 16 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice16_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice16_01-.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 125 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 16 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice16_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice16_02-.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 126 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 16 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice16_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice16_02-.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 127 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 16 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice16_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice16_02-.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 128 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 16 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice16_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice16_02-.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 129 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 17 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice17_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice17_01-.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 130 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 17 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice17_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice17_01-.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 131 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 17 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice17_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice17_01-.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 132 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 17 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice17_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice17_01-.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 133 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 17 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice17_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice17_02-.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 134 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 17 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice17_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice17_02-.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 135 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 17 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice17_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice17_02-.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 136 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 17 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice17_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice17_02-.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 137 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 18 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice18_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice18_01-.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 138 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 18 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice18_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice18_01-.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 139 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 18 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice18_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice18_01-.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 140 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 18 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice18_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice18_01-.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 141 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 18 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice18_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice18_02-.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 142 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 18 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice18_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice18_02-.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 143 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 18 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice18_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice18_02-.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 144 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 18 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice18_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice18_02-.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 145 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 19 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice19_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice19_01-.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 146 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 19 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice19_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice19_01-.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 147 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 19 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice19_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice19_01-.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 148 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 19 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice19_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice19_01-.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 149 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 19 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice19_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice19_02-.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 150 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 19 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice19_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice19_02-.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 151 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 19 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice19_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice19_02-.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 152 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 19 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice19_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice19_02-.RAW +Sample 1 Homo sapiens anterior temporal lobe 41 not available F not available cardiopulmonary insufficiency not applicable 1 1 tissue run 153 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 20 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice20_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice20_01-.RAW +Sample 2 Homo sapiens anterior temporal lobe 91 not available F not available cardiopulmonary insufficiency not applicable 2 1 tissue run 154 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 20 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice20_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice20_01-.RAW +Sample 3 Homo sapiens anterior temporal lobe 69 not available F not available lung embolism not applicable 3 1 tissue run 155 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 20 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice20_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice20_01-.RAW +Sample 4 Homo sapiens anterior temporal lobe 57 not available M not available heart infarction not applicable 4 1 tissue run 156 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 20 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice20_01-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice20_01-.RAW +Sample 5 Homo sapiens anterior temporal lobe 53 not available M not available heart infarction not applicable 5 1 tissue run 157 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 20 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice20_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice20_02-.RAW +Sample 6 Homo sapiens anterior temporal lobe 63 not available M not available heart infarction not applicable 6 1 tissue run 158 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 20 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice20_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice20_02-.RAW +Sample 7 Homo sapiens anterior temporal lobe 66 not available M not available heart infarction not applicable 7 1 tissue run 159 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 20 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice20_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice20_02-.RAW +Sample 8 Homo sapiens anterior temporal lobe 79 not available M not available heart infarction not applicable 8 1 tissue run 160 proteomic profiling by mass spectrometry NT=SDS PAGE;AC=PRIDE:0000568 1 20 https://ftp.ebi.ac.uk/pride-archive/2014/06/PXD000548/dms_17Jul13_ATL_Proteome_Slice20_02-.RAW AC=MS:1000484; NT=Orbitrap NT=LTQ Orbitrap XL;AC=MS:1000556 NT=High-performance liquid chromatography;AC=PRIDE:0000565 AC=MS:1002038;NT=label free sample AC=MS:1001251;NT=Trypsin NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 NT=Oxidation;MT=variable;TA=M;AC=UNIMOD:35 not available not available dms_17Jul13_ATL_Proteome_Slice20_02-.RAW diff --git a/templates/sdrf-cell-line.tsv b/templates/sdrf-cell-line.tsv new file mode 100644 index 0000000..37219fe --- /dev/null +++ b/templates/sdrf-cell-line.tsv @@ -0,0 +1 @@ +source name characteristics[organism] characteristics[organism part] characteristics[cell type] characteristics[disease] characteristics[cell line] characteristics[biological replicate] technology type assay name comment[technical replicate] comment[data file] comment[fraction identifier] comment[label] comment[cleavage agent details] comment[instrument] diff --git a/templates/sdrf-default.tsv b/templates/sdrf-default.tsv new file mode 100644 index 0000000..9241fed --- /dev/null +++ b/templates/sdrf-default.tsv @@ -0,0 +1,2 @@ +source name characteristics[organism] characteristics[organism part] characteristics[disease] characteristics[biological replicate] technology type assay name comment[technical replicate] comment[data file] comment[fraction identifier] comment[label] comment[cleavage agent details] comment[instrument] + diff --git a/templates/sdrf-human.tsv b/templates/sdrf-human.tsv new file mode 100644 index 0000000..cf299b7 --- /dev/null +++ b/templates/sdrf-human.tsv @@ -0,0 +1 @@ +source name characteristics[organism] characteristics[organism part] characteristics[cell type] characteristics[ancestry category] characteristics[age] characteristics[sex] characteristics[disease] characteristics[individual] characteristics[biological replicate] technology type assay name comment[technical replicate] comment[data file] comment[fraction identifier] comment[label] comment[instrument] comment[cleavage agent details] diff --git a/templates/sdrf-nonvertebrates.tsv b/templates/sdrf-nonvertebrates.tsv new file mode 100644 index 0000000..d38fb31 --- /dev/null +++ b/templates/sdrf-nonvertebrates.tsv @@ -0,0 +1 @@ +source name characteristics[organism] characteristics[organism part] characteristics[disease] characteristics[cell type] characteristics[biological replicate] technology type assay name comment[technical replicate] comment[data file] comment[fraction identifier] comment[label] comment[instrument] comment[cleavage agent details] diff --git a/templates/sdrf-plants.tsv b/templates/sdrf-plants.tsv new file mode 100644 index 0000000..ae777b7 --- /dev/null +++ b/templates/sdrf-plants.tsv @@ -0,0 +1 @@ +source name characteristics[organism] characteristics[organism part] characteristics[cell type] characteristics[disease] characteristics[biological replicate] technology type assay name comment[technical replicate] comment[data file] comment[fraction identifier] comment[label] comment[instrument] comment[cleavage agent details] diff --git a/templates/sdrf-vertebrates.tsv b/templates/sdrf-vertebrates.tsv new file mode 100644 index 0000000..8b5c7ff --- /dev/null +++ b/templates/sdrf-vertebrates.tsv @@ -0,0 +1 @@ +source name characteristics[organism] characteristics[organism part] characteristics[cell type] characteristics[developmental stage] characteristics[disease] characteristics[biological replicate] technology type assay name comment[technical replicate] comment[data file] comment[fraction identifier] comment[label] comment[cleavage agent details] comment[instrument]