From 1221f4000ee4de25024122f9cb0fcdb3456c3eaf Mon Sep 17 00:00:00 2001 From: niklases <60261497+niklases@users.noreply.github.com> Date: Mon, 8 May 2023 18:01:41 +0200 Subject: [PATCH] Update Workflow_PyPEF.ipynb --- workflow/Workflow_PyPEF.ipynb | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/workflow/Workflow_PyPEF.ipynb b/workflow/Workflow_PyPEF.ipynb index 328cd93..9b1bd43 100644 --- a/workflow/Workflow_PyPEF.ipynb +++ b/workflow/Workflow_PyPEF.ipynb @@ -362,7 +362,24 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For hybrid modeling, here we use a very small dataset fraction of the ANEH dataset (37 variants with associated (negative) fitness values). Next, you can run model training and validation on the test sets and save the best models (in this notebook only using a single CPU):" + "For hybrid modeling, here we use a very small dataset fraction of the ANEH dataset (37 variants with associated (negative) fitness values). Next, you can run model training and validation on the test sets and save the best models (in this notebook only using a single CPU). For DCA-based encoding, downloading the plmc parameter file is necessary (uncomment cell below)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#import urllib.request\n", + "#from hashlib import sha256\n", + "#url = 'https://github.com/niklases/PyPEF/raw/main/workflow/test_dataset_aneh/ANEH_72.6.params'\n", + "#urllib.request.urlretrieve(url, 'ANEH_72.6.params') # File size: 207 MB\n", + "#with open('ANEH_72.6.params', 'rb') as f:\n", + "# sha256_hash = sha256(f.read()).hexdigest()\n", + "# if not sha256_hash == 'd84bd2185f5882c3ef01ab99264e30111333b8adb05136b953f407b1b6dfbaf8':\n", + "# raise SystemError(\"Hash of downloaded DCA parameter file not correct, terminating further running.\")\n", + "#print('Successfully downloaded ANEH_72.6.params!')" ] }, { @@ -824,7 +841,6 @@ " sha256_hash = sha256(f.read()).hexdigest()\n", " if not sha256_hash == 'be4623f35a5ba05d33a29ae6e69dc3c2e994e3c9092cd5880a8d0bbc12f187b1':\n", " raise SystemError(\"Hash of downloaded CSV file not correct, terminating further running.\")\n", - "# Wild-type encoding,\n", "url = 'https://github.com/niklases/PyPEF/raw/main/workflow/test_dataset_avgfp/uref100_avgfp_jhmmer_119_plmc_42.6.params'\n", "urllib.request.urlretrieve(url, 'uref100_avgfp_jhmmer_119_plmc_42.6.params') # File size: 71.2 MB\n", "with open('uref100_avgfp_jhmmer_119_plmc_42.6.params', 'rb') as f:\n",