diff --git a/src/analysis/opencravat_latest_benchmarking-Consequence_80_20.ipynb b/src/analysis/opencravat_latest_benchmarking-Consequence_80_20.ipynb
index 9ba3f2e..97f6d71 100644
--- a/src/analysis/opencravat_latest_benchmarking-Consequence_80_20.ipynb
+++ b/src/analysis/opencravat_latest_benchmarking-Consequence_80_20.ipynb
@@ -25,7 +25,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 3,
"id": "2b03269e",
"metadata": {},
"outputs": [
@@ -33,249 +33,842 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "2024-02-28 16:40:55.203574: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA\n",
+ "2024-06-27 17:31:47.599689: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA\n",
+ "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "pd.set_option('display.max_rows', None)\n",
+ "import yaml\n",
+ "import warnings\n",
+ "warnings.simplefilter(\"ignore\")\n",
+ "#from joblib import load, dump\n",
+ "import argparse\n",
+ "#import shap\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import functools\n",
+ "print = functools.partial(print, flush=True)\n",
+ "from sklearn.preprocessing import label_binarize, MinMaxScaler\n",
+ "from tensorflow import keras\n",
+ "from sklearn.metrics import (\n",
+ " roc_curve,precision_score,\n",
+ " precision_recall_curve,roc_auc_score,\n",
+ " f1_score,accuracy_score, confusion_matrix, ConfusionMatrixDisplay,\n",
+ " confusion_matrix,\n",
+ " average_precision_score,\n",
+ " recall_score\n",
+ ")\n",
+ "import pickle\n",
+ "from sklearn.utils import class_weight\n",
+ "import shap\n",
+ "# from keras_sequential_ascii import keras2ascii\n",
+ "# from nnv import NNV\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "880823ee",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "warnings.simplefilter(\"ignore\", category=DeprecationWarning)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "22e98d97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with open(\n",
+ " \"../../configs/col_config.yaml\"\n",
+ " ) as fh:\n",
+ " config_dict = yaml.safe_load(fh)\n",
+ "\n",
+ "with open(\n",
+ " \"../../configs/var_class.yaml\"\n",
+ " ) as fh1:\n",
+ " var_dict = yaml.safe_load(fh1)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'3 prime UTR',\n",
+ " '5 prime UTR',\n",
+ " 'complex substitution',\n",
+ " 'exon loss variant',\n",
+ " 'frameshift elongation',\n",
+ " 'frameshift truncation',\n",
+ " 'inframe deletion',\n",
+ " 'inframe insertion',\n",
+ " 'intergenic',\n",
+ " 'intron',\n",
+ " 'missense',\n",
+ " 'other',\n",
+ " 'other RNA',\n",
+ " 'splice site',\n",
+ " 'start lost',\n",
+ " 'start retained',\n",
+ " 'stop gained',\n",
+ " 'stop lost',\n",
+ " 'stop retained',\n",
+ " 'synonymous'}"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "set(var_dict.values())\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "3a4ff3de",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#amis = pd.read_csv(\"/Users/tarunmamidi/Downloads/AlphaMissense_hg38.tsv\", low_memory=False, skiprows=3, sep='\\t')\n",
+ "#amis.head()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "4bcc801e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-06-27 17:31:55.583994: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Model: \"sequential\"\n",
+ "_________________________________________________________________\n",
+ " Layer (type) Output Shape Param # \n",
+ "=================================================================\n",
+ " dense (Dense) (None, 239) 57360 \n",
+ " \n",
+ " dense_l0 (Dense) (None, 161) 38640 \n",
+ " \n",
+ " dropout (Dropout) (None, 161) 0 \n",
+ " \n",
+ " dense_last (Dense) (None, 1) 162 \n",
+ " \n",
+ "=================================================================\n",
+ "Total params: 96,162\n",
+ "Trainable params: 96,162\n",
+ "Non-trainable params: 0\n",
+ "_________________________________________________________________\n"
+ ]
+ }
+ ],
+ "source": [
+ "clf = keras.models.load_model('../../model/Neural_network/')\n",
+ "clf.load_weights(\"../../model/weights.h5\")\n",
+ "clf.summary()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#keras2ascii(clf)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# layersList = [\n",
+ "# {\"title\":\"Input\\n(239 n)\\n(elu)\", \"units\": 239, \"color\": \"green\", \"edges_color\":\"darkBlue\", \"edges_width\":2},\n",
+ "# {\"title\":\"Dense\\n(161 n)\\n(elu)\", \"units\": 161, \"edges_color\":\"darkBlue\", \"edges_width\":2,\"color\": \"orange\"},\n",
+ "# #{\"title\":\"Dropout\", \"units\": 161, \"edges_color\":\"red\", \"edges_width\":2},\n",
+ "# {\"title\":\"output\\n(1 n)\\n(sigmoid)\", \"units\": 1,\"color\": \"red\"},\n",
+ "# ]\n",
+ "\n",
+ "# NNV(layersList).render(save_to_file=\"DITTO.png\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "276a7133",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(842659, 255)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " transcript | \n",
+ " gene | \n",
+ " consequence | \n",
+ " protein_hgvs | \n",
+ " cdna_hgvs | \n",
+ " chrom | \n",
+ " pos | \n",
+ " ref_base | \n",
+ " alt_base | \n",
+ " clingen.disease | \n",
+ " ... | \n",
+ " mutationtaster.prediction_Automatic Polymorphism | \n",
+ " mutationtaster.prediction_Damaging | \n",
+ " mutationtaster.prediction_Polymorphism | \n",
+ " mutationtaster.model_complex_aae | \n",
+ " mutationtaster.model_simple_aae | \n",
+ " mutationtaster.model_without_aae | \n",
+ " prec.stat_lof-tolerant | \n",
+ " prec.stat_recessive | \n",
+ " sift.confidence_High | \n",
+ " sift.confidence_Low | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ENST00000350721 | \n",
+ " ATR | \n",
+ " synonymous_variant | \n",
+ " p.Asp2494= | \n",
+ " c.7482T>C | \n",
+ " chr3 | \n",
+ " 142458979 | \n",
+ " A | \n",
+ " G | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " ENST00000661310 | \n",
+ " ATR | \n",
+ " synonymous_variant | \n",
+ " p.Asp2430= | \n",
+ " c.7290T>C | \n",
+ " chr3 | \n",
+ " 142458979 | \n",
+ " A | \n",
+ " G | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ENST00000310018 | \n",
+ " ATP6V0A4 | \n",
+ " missense_variant | \n",
+ " p.Asp679Tyr | \n",
+ " c.2035G>T | \n",
+ " chr7 | \n",
+ " 138722001 | \n",
+ " C | \n",
+ " A | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ENST00000353492 | \n",
+ " ATP6V0A4 | \n",
+ " missense_variant | \n",
+ " p.Asp679Tyr | \n",
+ " c.2035G>T | \n",
+ " chr7 | \n",
+ " 138722001 | \n",
+ " C | \n",
+ " A | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ENST00000393054 | \n",
+ " ATP6V0A4 | \n",
+ " missense_variant | \n",
+ " p.Asp679Tyr | \n",
+ " c.2035G>T | \n",
+ " chr7 | \n",
+ " 138722001 | \n",
+ " C | \n",
+ " A | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 255 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " transcript gene consequence protein_hgvs cdna_hgvs \n",
+ "0 ENST00000350721 ATR synonymous_variant p.Asp2494= c.7482T>C \\\n",
+ "1 ENST00000661310 ATR synonymous_variant p.Asp2430= c.7290T>C \n",
+ "2 ENST00000310018 ATP6V0A4 missense_variant p.Asp679Tyr c.2035G>T \n",
+ "3 ENST00000353492 ATP6V0A4 missense_variant p.Asp679Tyr c.2035G>T \n",
+ "4 ENST00000393054 ATP6V0A4 missense_variant p.Asp679Tyr c.2035G>T \n",
+ "\n",
+ " chrom pos ref_base alt_base clingen.disease ... \n",
+ "0 chr3 142458979 A G NaN ... \\\n",
+ "1 chr3 142458979 A G NaN ... \n",
+ "2 chr7 138722001 C A NaN ... \n",
+ "3 chr7 138722001 C A NaN ... \n",
+ "4 chr7 138722001 C A NaN ... \n",
+ "\n",
+ " mutationtaster.prediction_Automatic Polymorphism \n",
+ "0 0 \\\n",
+ "1 0 \n",
+ "2 0 \n",
+ "3 0 \n",
+ "4 0 \n",
+ "\n",
+ " mutationtaster.prediction_Damaging mutationtaster.prediction_Polymorphism \n",
+ "0 0 0 \\\n",
+ "1 0 0 \n",
+ "2 0 1 \n",
+ "3 0 1 \n",
+ "4 0 1 \n",
+ "\n",
+ " mutationtaster.model_complex_aae mutationtaster.model_simple_aae \n",
+ "0 0 0 \\\n",
+ "1 0 0 \n",
+ "2 0 1 \n",
+ "3 0 1 \n",
+ "4 0 1 \n",
+ "\n",
+ " mutationtaster.model_without_aae prec.stat_lof-tolerant \n",
+ "0 0 0 \\\n",
+ "1 0 0 \n",
+ "2 0 0 \n",
+ "3 0 0 \n",
+ "4 0 0 \n",
+ "\n",
+ " prec.stat_recessive sift.confidence_High sift.confidence_Low \n",
+ "0 1 1 0 \n",
+ "1 1 0 0 \n",
+ "2 0 1 0 \n",
+ "3 0 1 0 \n",
+ "4 0 1 0 \n",
+ "\n",
+ "[5 rows x 255 columns]"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_train = pd.read_csv(f\"../../data/train_class_data_80.csv.gz\")\n",
+ "print(X_train.shape)\n",
+ "X_train.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "401a4e11",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['transcript',\n",
+ " 'gene',\n",
+ " 'consequence',\n",
+ " 'protein_hgvs',\n",
+ " 'cdna_hgvs',\n",
+ " 'chrom',\n",
+ " 'pos',\n",
+ " 'ref_base',\n",
+ " 'alt_base',\n",
+ " 'clingen.disease',\n",
+ " 'clingen.classification',\n",
+ " 'ncbigene.entrez',\n",
+ " 'omim.omim_id',\n",
+ " 'uniprot.acc',\n",
+ " 'dbsnp.rsid',\n",
+ " 'class',\n",
+ " 'aloft.tolerant',\n",
+ " 'aloft.recessive',\n",
+ " 'aloft.dominant',\n",
+ " 'cadd.phred',\n",
+ " 'chasmplus.score',\n",
+ " 'chasmplus.pval',\n",
+ " 'civic.molecular_profile_score',\n",
+ " 'cosmic.variant_count',\n",
+ " 'cosmic_gene.occurrences',\n",
+ " 'cscape.score',\n",
+ " 'cancer_genome_interpreter.resistant',\n",
+ " 'cancer_genome_interpreter.responsive',\n",
+ " 'clinpred.score',\n",
+ " 'dann.score',\n",
+ " 'dann_coding.dann_coding_score',\n",
+ " 'dgi.score',\n",
+ " 'ess_gene.indispensability_score',\n",
+ " 'exac_gene.exac_pli',\n",
+ " 'exac_gene.exac_pnull',\n",
+ " 'exac_gene.exac_del_score',\n",
+ " 'exac_gene.exac_dup_score',\n",
+ " 'exac_gene.exac_cnv_score',\n",
+ " 'fathmm.fathmm_score',\n",
+ " 'fathmm_xf_coding.fathmm_xf_coding_score',\n",
+ " 'funseq2.score',\n",
+ " 'gerp.gerp_rs',\n",
+ " 'ghis.ghis',\n",
+ " 'gwas_catalog.pval',\n",
+ " 'genehancer.score',\n",
+ " 'linsight.value',\n",
+ " 'lrt.lrt_score',\n",
+ " 'lrt.lrt_omega',\n",
+ " 'loftool.loftool_score',\n",
+ " 'mavedb.score',\n",
+ " 'metalr.score',\n",
+ " 'metasvm.score',\n",
+ " 'mutpred1.mutpred_general_score',\n",
+ " 'mutpred_indel.score',\n",
+ " 'mutation_assessor.score',\n",
+ " 'mutationtaster.score',\n",
+ " 'ndex_chd.numhit',\n",
+ " 'ndex.numhit',\n",
+ " 'ndex_signor.numhit',\n",
+ " 'prec.prec',\n",
+ " 'provean.score',\n",
+ " 'pangalodb.sensitivity',\n",
+ " 'pangalodb.specificity',\n",
+ " 'phdsnpg.score',\n",
+ " 'phastcons.phastcons100_vert',\n",
+ " 'phastcons.phastcons30_mamm',\n",
+ " 'phastcons.phastcons17way_primate',\n",
+ " 'phylop.phylop100_vert',\n",
+ " 'phylop.phylop30_mamm',\n",
+ " 'phylop.phylop17_primate',\n",
+ " 'polyphen2.hdiv_rank',\n",
+ " 'polyphen2.hvar_rank',\n",
+ " 'revel.score',\n",
+ " 'rvis.rvis_evs',\n",
+ " 'sift.score',\n",
+ " 'sift.med',\n",
+ " 'sift.seqs',\n",
+ " 'segway.mean_score',\n",
+ " 'siphy.logodds_rank',\n",
+ " 'spliceai.ds_ag',\n",
+ " 'spliceai.ds_al',\n",
+ " 'spliceai.ds_dg',\n",
+ " 'spliceai.ds_dl',\n",
+ " 'spliceai.dp_ag',\n",
+ " 'spliceai.dp_al',\n",
+ " 'spliceai.dp_dg',\n",
+ " 'spliceai.dp_dl',\n",
+ " 'varity_r.varity_r_loo',\n",
+ " 'varity_r.varity_er_loo',\n",
+ " 'vest.score',\n",
+ " 'dbscsnv.ada_score',\n",
+ " 'dbscsnv.rf_score',\n",
+ " 'gnomad.af',\n",
+ " 'gnomad_gene.oe_lof',\n",
+ " 'gnomad_gene.oe_mis',\n",
+ " 'gnomad_gene.oe_syn',\n",
+ " 'gnomad_gene.lof_z',\n",
+ " 'gnomad_gene.mis_z',\n",
+ " 'gnomad_gene.syn_z',\n",
+ " 'gnomad_gene.pLI',\n",
+ " 'gnomad_gene.pRec',\n",
+ " 'gnomad_gene.pNull',\n",
+ " 'gnomad3.af',\n",
+ " 'phi.phi',\n",
+ " 'Adipose_Subcutaneous',\n",
+ " 'Adipose_Visceral_Omentum',\n",
+ " 'Adrenal_Gland',\n",
+ " 'Artery_Aorta',\n",
+ " 'Artery_Coronary',\n",
+ " 'Artery_Tibial',\n",
+ " 'Brain_Amygdala',\n",
+ " 'Brain_Anterior_cingulate_cortex_BA24',\n",
+ " 'Brain_Caudate_basal_ganglia',\n",
+ " 'Brain_Cerebellar_Hemisphere',\n",
+ " 'Brain_Cerebellum',\n",
+ " 'Brain_Cortex',\n",
+ " 'Brain_Frontal_Cortex_BA9',\n",
+ " 'Brain_Hippocampus',\n",
+ " 'Brain_Hypothalamus',\n",
+ " 'Brain_Nucleus_accumbens_basal_ganglia',\n",
+ " 'Brain_Putamen_basal_ganglia',\n",
+ " 'Brain_Spinal_cord_cervical_c-1',\n",
+ " 'Brain_Substantia_nigra',\n",
+ " 'Breast_Mammary_Tissue',\n",
+ " 'Cells_EBV-transformed_lymphocytes',\n",
+ " 'Cells_Transformed_fibroblasts',\n",
+ " 'Colon_Sigmoid',\n",
+ " 'Colon_Transverse',\n",
+ " 'Esophagus_Gastroesophageal_Junction',\n",
+ " 'Esophagus_Mucosa',\n",
+ " 'Esophagus_Muscularis',\n",
+ " 'Heart_Atrial_Appendage',\n",
+ " 'Heart_Left_Ventricle',\n",
+ " 'Liver',\n",
+ " 'Lung',\n",
+ " 'Minor_Salivary_Gland',\n",
+ " 'Muscle_Skeletal',\n",
+ " 'Nerve_Tibial',\n",
+ " 'Ovary',\n",
+ " 'Pancreas',\n",
+ " 'Pituitary',\n",
+ " 'Prostate',\n",
+ " 'Skin_Not_Sun_Exposed_Suprapubic',\n",
+ " 'Skin_Sun_Exposed_Lower_leg',\n",
+ " 'Small_Intestine_Terminal_Ileum',\n",
+ " 'Spleen',\n",
+ " 'Stomach',\n",
+ " 'Testis',\n",
+ " 'Thyroid',\n",
+ " 'Uterus',\n",
+ " 'Vagina',\n",
+ " 'Whole_Blood',\n",
+ " 'activator',\n",
+ " 'adduct',\n",
+ " 'agonist',\n",
+ " 'allosteric modulator',\n",
+ " 'antagonist',\n",
+ " 'antibody',\n",
+ " 'binder',\n",
+ " 'blocker',\n",
+ " 'chaperone',\n",
+ " 'cofactor',\n",
+ " 'inducer',\n",
+ " 'inhibitor',\n",
+ " 'ligand',\n",
+ " 'modulator',\n",
+ " 'negative modulator',\n",
+ " 'positive modulator',\n",
+ " 'potentiator',\n",
+ " 'product of',\n",
+ " 'stimulator',\n",
+ " 'substrate',\n",
+ " 'vaccine',\n",
+ " 'AD',\n",
+ " 'AR',\n",
+ " 'AR ',\n",
+ " 'BG',\n",
+ " 'Digenic',\n",
+ " 'XL',\n",
+ " '2kb_downstream_variant',\n",
+ " '2kb_upstream_variant',\n",
+ " '3_prime_UTR_variant',\n",
+ " '5_prime_UTR_variant',\n",
+ " 'NMD_transcript_variant',\n",
+ " 'NSD_transcript',\n",
+ " 'complex_substitution',\n",
+ " 'exon_loss_variant',\n",
+ " 'frameshift_elongation',\n",
+ " 'frameshift_truncation',\n",
+ " 'inframe_deletion',\n",
+ " 'inframe_insertion',\n",
+ " 'intron_variant',\n",
+ " 'lnc_RNA',\n",
+ " 'miRNA',\n",
+ " 'misc_RNA',\n",
+ " 'missense_variant',\n",
+ " 'polymorphic_pseudogene',\n",
+ " 'processed_transcript',\n",
+ " 'rRNA',\n",
+ " 'ribozyme',\n",
+ " 'scaRNA',\n",
+ " 'snRNA',\n",
+ " 'snoRNA',\n",
+ " 'splice_site_variant',\n",
+ " 'start_lost',\n",
+ " 'start_retained_variant',\n",
+ " 'stop_gained',\n",
+ " 'stop_lost',\n",
+ " 'stop_retained_variant',\n",
+ " 'synonymous_variant',\n",
+ " 'transcript_ablation',\n",
+ " 'LINE',\n",
+ " 'LTR',\n",
+ " 'Low_complexity',\n",
+ " 'SINE',\n",
+ " 'Satellite',\n",
+ " 'Simple_repeat',\n",
+ " 'Enhancer',\n",
+ " 'Promoter',\n",
+ " 'germline',\n",
+ " 'somatic',\n",
+ " 'Oncogene',\n",
+ " 'TSG',\n",
+ " 'fusion',\n",
+ " 'coding_Yes',\n",
+ " 'aloft.pred_Dominant',\n",
+ " 'aloft.pred_Recessive',\n",
+ " 'aloft.pred_Tolerant',\n",
+ " 'aloft.conf_High',\n",
+ " 'aloft.conf_Low',\n",
+ " 'ccre_screen._group_CTCF-only',\n",
+ " 'ccre_screen._group_DNase-H3K4me3',\n",
+ " 'ccre_screen._group_PLS',\n",
+ " 'ccre_screen._group_dELS',\n",
+ " 'ccre_screen._group_pELS',\n",
+ " 'ccre_screen.bound_Yes',\n",
+ " 'ensembl_regulatory_build.region_CTCF_binding_site',\n",
+ " 'ensembl_regulatory_build.region_TF_binding_site',\n",
+ " 'ensembl_regulatory_build.region_enhancer',\n",
+ " 'ensembl_regulatory_build.region_open_chromatin_region',\n",
+ " 'ensembl_regulatory_build.region_promoter',\n",
+ " 'ensembl_regulatory_build.region_promoter_flanking_region',\n",
+ " 'exac_gene.exac_cnv_flag_N',\n",
+ " 'exac_gene.exac_cnv_flag_Y',\n",
+ " 'mutationtaster.prediction_Automatic Disease Causing',\n",
+ " 'mutationtaster.prediction_Automatic Polymorphism',\n",
+ " 'mutationtaster.prediction_Damaging',\n",
+ " 'mutationtaster.prediction_Polymorphism',\n",
+ " 'mutationtaster.model_complex_aae',\n",
+ " 'mutationtaster.model_simple_aae',\n",
+ " 'mutationtaster.model_without_aae',\n",
+ " 'prec.stat_lof-tolerant',\n",
+ " 'prec.stat_recessive',\n",
+ " 'sift.confidence_High',\n",
+ " 'sift.confidence_Low']"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "import pandas as pd\n",
- "pd.set_option('display.max_rows', None)\n",
- "import yaml\n",
- "import warnings\n",
- "warnings.simplefilter(\"ignore\")\n",
- "#from joblib import load, dump\n",
- "import argparse\n",
- "#import shap\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "import functools\n",
- "print = functools.partial(print, flush=True)\n",
- "from sklearn.preprocessing import label_binarize, MinMaxScaler\n",
- "from tensorflow import keras\n",
- "from sklearn.metrics import (\n",
- " roc_curve,precision_score,\n",
- " precision_recall_curve,roc_auc_score,\n",
- " f1_score,accuracy_score, confusion_matrix, ConfusionMatrixDisplay,\n",
- " confusion_matrix,\n",
- " average_precision_score,\n",
- " recall_score\n",
- ")\n",
- "import pickle\n",
- "from sklearn.utils import class_weight\n",
- "import shap\n",
- "# from keras_sequential_ascii import keras2ascii\n",
- "# from nnv import NNV\n"
+ "X_train.columns.to_list()"
]
},
{
"cell_type": "code",
- "execution_count": 3,
- "id": "880823ee",
+ "execution_count": 5,
+ "id": "9f6eef91",
"metadata": {},
"outputs": [],
"source": [
- "warnings.simplefilter(\"ignore\", category=DeprecationWarning)\n"
+ "X_train[['chrom', 'pos', 'ref_base', 'alt_base','class']].drop_duplicates().to_csv(\"../../data/training_variants.csv\", index=False)"
]
},
{
"cell_type": "code",
- "execution_count": 4,
- "id": "22e98d97",
+ "execution_count": 7,
+ "id": "0a8776c9",
"metadata": {},
"outputs": [],
"source": [
- "with open(\n",
- " \"../../configs/col_config.yaml\"\n",
- " ) as fh:\n",
- " config_dict = yaml.safe_load(fh)\n",
- "\n",
- "with open(\n",
- " \"../../configs/var_class.yaml\"\n",
- " ) as fh1:\n",
- " var_dict = yaml.safe_load(fh1)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'3 prime UTR',\n",
- " '5 prime UTR',\n",
- " 'complex substitution',\n",
- " 'exon loss variant',\n",
- " 'frameshift elongation',\n",
- " 'frameshift truncation',\n",
- " 'inframe deletion',\n",
- " 'inframe insertion',\n",
- " 'intergenic',\n",
- " 'intron',\n",
- " 'missense',\n",
- " 'other',\n",
- " 'other RNA',\n",
- " 'splice site',\n",
- " 'start lost',\n",
- " 'start retained',\n",
- " 'stop gained',\n",
- " 'stop lost',\n",
- " 'stop retained',\n",
- " 'synonymous'}"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "set(var_dict.values())\n"
+ "#X_train.chrom.value_counts()"
]
},
{
"cell_type": "code",
- "execution_count": 11,
- "id": "3a4ff3de",
+ "execution_count": 8,
+ "id": "eeae3ed6",
"metadata": {},
"outputs": [],
"source": [
- "#amis = pd.read_csv(\"/Users/tarunmamidi/Downloads/AlphaMissense_hg38.tsv\", low_memory=False, skiprows=3, sep='\\t')\n",
- "#amis.head()\n"
+ "#X_train[X_train.chrom == \"chrY\"]"
]
},
{
"cell_type": "code",
"execution_count": 5,
- "id": "4bcc801e",
+ "id": "bd9d4901",
"metadata": {},
"outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2024-02-28 16:41:02.474929: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA\n",
- "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
- ]
- },
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Model: \"sequential\"\n",
- "_________________________________________________________________\n",
- " Layer (type) Output Shape Param # \n",
- "=================================================================\n",
- " dense (Dense) (None, 239) 57360 \n",
- " \n",
- " dense_l0 (Dense) (None, 161) 38640 \n",
- " \n",
- " dropout (Dropout) (None, 161) 0 \n",
- " \n",
- " dense_last (Dense) (None, 1) 162 \n",
- " \n",
- "=================================================================\n",
- "Total params: 96,162\n",
- "Trainable params: 96,162\n",
- "Non-trainable params: 0\n",
- "_________________________________________________________________\n"
+ "1/1 [==============================] - 0s 137ms/step\n",
+ "[0.94781121]\n"
]
}
],
"source": [
- "clf = keras.models.load_model('../../model/Neural_network/')\n",
- "clf.load_weights(\"../../model/weights.h5\")\n",
- "clf.summary()\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [],
- "source": [
- "#keras2ascii(clf)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [],
- "source": [
- "# layersList = [\n",
- "# {\"title\":\"Input\\n(239 n)\\n(elu)\", \"units\": 239, \"color\": \"green\", \"edges_color\":\"darkBlue\", \"edges_width\":2},\n",
- "# {\"title\":\"Dense\\n(161 n)\\n(elu)\", \"units\": 161, \"edges_color\":\"darkBlue\", \"edges_width\":2,\"color\": \"orange\"},\n",
- "# #{\"title\":\"Dropout\", \"units\": 161, \"edges_color\":\"red\", \"edges_width\":2},\n",
- "# {\"title\":\"output\\n(1 n)\\n(sigmoid)\", \"units\": 1,\"color\": \"red\"},\n",
- "# ]\n",
- "\n",
- "# NNV(layersList).render(save_to_file=\"DITTO.png\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "id": "276a7133",
- "metadata": {},
- "outputs": [],
- "source": [
- "#X_train = pd.read_csv(f\"../../data/train_class_data_80.csv.gz\")\n",
- "#train.head()"
+ "pkl_file = open(\n",
+ " \"../../data/background.pkl\",\n",
+ " \"rb\",\n",
+ " )\n",
+ "background = pickle.load(pkl_file)\n",
+ "pkl_file.close()\n",
+ "explainer = shap.KernelExplainer(clf.predict, background)\n",
+ "print(explainer.expected_value)\n",
+ "del background\n"
]
},
{
"cell_type": "code",
- "execution_count": 7,
- "id": "0a8776c9",
+ "execution_count": 10,
+ "id": "475cd807",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(208167, 255)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(208167, 255)"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#X_train.chrom.value_counts()"
+ "X_test = pd.read_csv(\"../../data/test_class_data_20.csv.gz\")\n",
+ "X_test.shape"
]
},
{
"cell_type": "code",
"execution_count": 8,
- "id": "eeae3ed6",
+ "id": "c0ae79e4",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(1050826, 255)\n"
+ ]
+ }
+ ],
"source": [
- "#X_train[X_train.chrom == \"chrY\"]"
+ "X_test = pd.concat([X_test, X_train]).reset_index(drop=True)\n",
+ "print(X_test.shape)\n"
]
},
{
"cell_type": "code",
- "execution_count": 9,
- "id": "bd9d4901",
+ "execution_count": 11,
+ "id": "fa7f5c32",
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "1/1 [==============================] - 0s 272ms/step\n",
- "[0.94781121]\n"
- ]
+ "data": {
+ "text/plain": [
+ "class\n",
+ "low_impact 147809\n",
+ "high_impact 60358\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "pkl_file = open(\n",
- " \"../../data/background.pkl\",\n",
- " \"rb\",\n",
- " )\n",
- "background = pickle.load(pkl_file)\n",
- "pkl_file.close()\n",
- "explainer = shap.KernelExplainer(clf.predict, background)\n",
- "print(explainer.expected_value)\n",
- "del background\n"
+ "X_test['class'].value_counts()"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 7,
"id": "9f23ed49",
"metadata": {},
"outputs": [],
@@ -293,7 +886,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 6,
"id": "c802b77d",
"metadata": {},
"outputs": [
@@ -527,7 +1120,7 @@
"[5 rows x 239 columns]"
]
},
- "execution_count": 7,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -538,7 +1131,7 @@
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -704,7 +1297,7 @@
"4 8831.0 NaN Q96PV0 rs781201249 "
]
},
- "execution_count": 49,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -741,7 +1334,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 8,
"id": "baf8c805",
"metadata": {},
"outputs": [],
@@ -782,7 +1375,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 8,
"id": "f1f0f9bd",
"metadata": {},
"outputs": [
@@ -920,22 +1513,22 @@
""
],
"text/plain": [
- " transcript gene consequence \\\n",
- "0 ENST00000293748 SYNGAP1 NMD_transcript_variant,synonymous_variant \n",
+ " transcript gene consequence \n",
+ "0 ENST00000293748 SYNGAP1 NMD_transcript_variant,synonymous_variant \\\n",
"1 ENST00000418600 SYNGAP1 synonymous_variant \n",
"2 ENST00000428982 SYNGAP1 synonymous_variant \n",
"3 ENST00000449372 SYNGAP1 synonymous_variant \n",
"4 ENST00000628646 SYNGAP1 synonymous_variant \n",
"\n",
- " protein_hgvs cdna_hgvs chrom pos ref_base alt_base \\\n",
- "0 p.Pro1051= c.3153T>G chr6 33443750 T G \n",
+ " protein_hgvs cdna_hgvs chrom pos ref_base alt_base \n",
+ "0 p.Pro1051= c.3153T>G chr6 33443750 T G \\\n",
"1 p.Pro1066= c.3198T>G chr6 33443750 T G \n",
"2 p.Pro1007= c.3021T>G chr6 33443750 T G \n",
"3 p.Pro1052= c.3156T>G chr6 33443750 T G \n",
"4 p.Pro1066= c.3198T>G chr6 33443750 T G \n",
"\n",
- " clingen.disease clingen.classification \\\n",
- "0 complex neurodevelopmental disorder Definitive \n",
+ " clingen.disease clingen.classification \n",
+ "0 complex neurodevelopmental disorder Definitive \\\n",
"1 complex neurodevelopmental disorder Definitive \n",
"2 complex neurodevelopmental disorder Definitive \n",
"3 complex neurodevelopmental disorder Definitive \n",
@@ -949,7 +1542,7 @@
"4 8831.0 NaN Q96PV0 rs781201249 "
]
},
- "execution_count": 9,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -1232,7 +1825,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 9,
"id": "6fc8e643",
"metadata": {},
"outputs": [
@@ -1250,7 +1843,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 10,
"id": "45412118",
"metadata": {},
"outputs": [
@@ -1266,7 +1859,7 @@
" [1.0000000e+00]], dtype=float32)"
]
},
- "execution_count": 14,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -1347,7 +1940,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -1361,7 +1954,7 @@
"Name: spliceai, dtype: float64"
]
},
- "execution_count": 15,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -1671,7 +2264,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 12,
"id": "dcc03dc4",
"metadata": {},
"outputs": [
@@ -1844,7 +2437,7 @@
"4 8831.0 NaN Q96PV0 rs781201249 0.0 "
]
},
- "execution_count": 16,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -1856,7 +2449,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 13,
"id": "a04829c0",
"metadata": {},
"outputs": [
@@ -1866,7 +2459,7 @@
"(208167, 257)"
]
},
- "execution_count": 17,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -1878,7 +2471,7 @@
},
{
"cell_type": "code",
- "execution_count": 61,
+ "execution_count": 18,
"id": "bd40b6e3",
"metadata": {},
"outputs": [
@@ -1914,6 +2507,7 @@
" alt_base | \n",
" clingen.disease | \n",
" ... | \n",
+ " mutationtaster.prediction_Damaging | \n",
" mutationtaster.prediction_Polymorphism | \n",
" mutationtaster.model_complex_aae | \n",
" mutationtaster.model_simple_aae | \n",
@@ -1923,7 +2517,6 @@
" sift.confidence_High | \n",
" sift.confidence_Low | \n",
" class | \n",
- " spliceai | \n",
" \n",
" \n",
" \n",
@@ -1946,10 +2539,10 @@
" 0 | \n",
" 0 | \n",
" 0 | \n",
+ " 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
- " 0.0004 | \n",
" \n",
" \n",
" 1 | \n",
@@ -1970,10 +2563,10 @@
" 0 | \n",
" 0 | \n",
" 0 | \n",
+ " 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
- " 0.0004 | \n",
"
\n",
" \n",
" 2 | \n",
@@ -1994,10 +2587,10 @@
" 0 | \n",
" 0 | \n",
" 0 | \n",
+ " 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
- " 0.0004 | \n",
"
\n",
" \n",
" 3 | \n",
@@ -2018,10 +2611,10 @@
" 0 | \n",
" 0 | \n",
" 0 | \n",
+ " 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
- " 0.0004 | \n",
"
\n",
" \n",
" 4 | \n",
@@ -2042,76 +2635,440 @@
" 0 | \n",
" 0 | \n",
" 0 | \n",
+ " 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
- " 0.0004 | \n",
"
\n",
" \n",
"\n",
- "5 rows × 257 columns
\n",
+ "5 rows × 256 columns
\n",
+ ""
+ ],
+ "text/plain": [
+ " transcript gene consequence \n",
+ "0 ENST00000293748 SYNGAP1 NMD_transcript_variant,synonymous_variant \\\n",
+ "1 ENST00000418600 SYNGAP1 synonymous_variant \n",
+ "2 ENST00000428982 SYNGAP1 synonymous_variant \n",
+ "3 ENST00000449372 SYNGAP1 synonymous_variant \n",
+ "4 ENST00000628646 SYNGAP1 synonymous_variant \n",
+ "\n",
+ " protein_hgvs cdna_hgvs chrom pos ref_base alt_base \n",
+ "0 p.Pro1051= c.3153T>G chr6 33443750 T G \\\n",
+ "1 p.Pro1066= c.3198T>G chr6 33443750 T G \n",
+ "2 p.Pro1007= c.3021T>G chr6 33443750 T G \n",
+ "3 p.Pro1052= c.3156T>G chr6 33443750 T G \n",
+ "4 p.Pro1066= c.3198T>G chr6 33443750 T G \n",
+ "\n",
+ " clingen.disease ... \n",
+ "0 complex neurodevelopmental disorder ... \\\n",
+ "1 complex neurodevelopmental disorder ... \n",
+ "2 complex neurodevelopmental disorder ... \n",
+ "3 complex neurodevelopmental disorder ... \n",
+ "4 complex neurodevelopmental disorder ... \n",
+ "\n",
+ " mutationtaster.prediction_Damaging mutationtaster.prediction_Polymorphism \n",
+ "0 0 0 \\\n",
+ "1 0 0 \n",
+ "2 0 0 \n",
+ "3 0 0 \n",
+ "4 0 0 \n",
+ "\n",
+ " mutationtaster.model_complex_aae mutationtaster.model_simple_aae \n",
+ "0 0 0 \\\n",
+ "1 0 0 \n",
+ "2 0 0 \n",
+ "3 0 0 \n",
+ "4 0 0 \n",
+ "\n",
+ " mutationtaster.model_without_aae prec.stat_lof-tolerant \n",
+ "0 0 0 \\\n",
+ "1 0 0 \n",
+ "2 0 0 \n",
+ "3 0 0 \n",
+ "4 0 0 \n",
+ "\n",
+ " prec.stat_recessive sift.confidence_High sift.confidence_Low class \n",
+ "0 0 1 0 0 \n",
+ "1 0 1 0 0 \n",
+ "2 0 1 0 0 \n",
+ "3 0 1 0 0 \n",
+ "4 0 1 0 0 \n",
+ "\n",
+ "[5 rows x 256 columns]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "var.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "a651a2bc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "var[['chrom', 'pos', 'ref_base', 'alt_base','class','DITTO']].sort_values(by='DITTO', ascending=False).drop_duplicates(['chrom', 'pos', 'ref_base', 'alt_base']).to_csv(\"../../data/testing_variants.csv\", index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "92f60d7b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4739"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(var.gene.unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "e9e88795",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DITTO | \n",
+ " class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
"
"
],
"text/plain": [
- " transcript gene consequence \n",
- "0 ENST00000293748 SYNGAP1 NMD_transcript_variant,synonymous_variant \\\n",
- "1 ENST00000418600 SYNGAP1 synonymous_variant \n",
- "2 ENST00000428982 SYNGAP1 synonymous_variant \n",
- "3 ENST00000449372 SYNGAP1 synonymous_variant \n",
- "4 ENST00000628646 SYNGAP1 synonymous_variant \n",
- "\n",
- " protein_hgvs cdna_hgvs chrom pos ref_base alt_base \n",
- "0 p.Pro1051= c.3153T>G chr6 33443750 T G \\\n",
- "1 p.Pro1066= c.3198T>G chr6 33443750 T G \n",
- "2 p.Pro1007= c.3021T>G chr6 33443750 T G \n",
- "3 p.Pro1052= c.3156T>G chr6 33443750 T G \n",
- "4 p.Pro1066= c.3198T>G chr6 33443750 T G \n",
- "\n",
- " clingen.disease ... \n",
- "0 complex neurodevelopmental disorder ... \\\n",
- "1 complex neurodevelopmental disorder ... \n",
- "2 complex neurodevelopmental disorder ... \n",
- "3 complex neurodevelopmental disorder ... \n",
- "4 complex neurodevelopmental disorder ... \n",
- "\n",
- " mutationtaster.prediction_Polymorphism mutationtaster.model_complex_aae \n",
- "0 0 0 \\\n",
- "1 0 0 \n",
- "2 0 0 \n",
- "3 0 0 \n",
- "4 0 0 \n",
- "\n",
- " mutationtaster.model_simple_aae mutationtaster.model_without_aae \n",
- "0 0 0 \\\n",
- "1 0 0 \n",
- "2 0 0 \n",
- "3 0 0 \n",
- "4 0 0 \n",
- "\n",
- " prec.stat_lof-tolerant prec.stat_recessive sift.confidence_High \n",
- "0 0 0 1 \\\n",
- "1 0 0 1 \n",
- "2 0 0 1 \n",
- "3 0 0 1 \n",
- "4 0 0 1 \n",
- "\n",
- " sift.confidence_Low class spliceai \n",
- "0 0 0 0.0004 \n",
- "1 0 0 0.0004 \n",
- "2 0 0 0.0004 \n",
- "3 0 0 0.0004 \n",
- "4 0 0 0.0004 \n",
- "\n",
- "[5 rows x 257 columns]"
+ " DITTO class\n",
+ "0 0.0 0\n",
+ "1 0.0 0\n",
+ "2 0.0 0\n",
+ "3 0.0 0\n",
+ "4 0.0 0"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "var[['DITTO','class']].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "97df8911",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(60358, 257)"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "var[var['class'] == 1].shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "f6d36c30",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAGhCAYAAAB8lIA8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAyjklEQVR4nO3dfXhU5Z3/8c+QhzFkk2kgJkMKItaAYMBqWEOgLSgQqAmpdntpGx3QUtSLSkhJFqHdrbBrgYLG1qUCtRZqRWMr0u0umCZWmxp5jqQ1xKdVBIIJQZlMQoQkJPfvD8v5OQnFkyHJTOj7dV3nj3Of78z5nvuiPR/vOTNxGGOMAAAAcF4Dgt0AAABAf0BoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALAh6KHp6NGjuuOOOzR48GANHDhQX/ziF1VRUWEdN8Zo2bJlSkpKUlRUlKZMmaIDBw74vUdLS4sWLFig+Ph4RUdHKzs7WzU1NX41Xq9XHo9HLpdLLpdLHo9HDQ0NfjWHDx/WrFmzFB0drfj4eOXm5qq1tbXXrh0AAPQfQQ1NXq9XkyZNUkREhF544QVVV1fr4Ycf1uc+9zmrZvXq1SosLNTatWu1d+9eud1uTZ8+XU1NTVZNXl6etm7dqqKiIpWXl+vkyZPKyspSe3u7VZOTk6PKykoVFxeruLhYlZWV8ng81vH29nZlZmaqublZ5eXlKioq0pYtW5Sfn98ncwEAAEKbI5h/sHfJkiV69dVX9corr5zzuDFGSUlJysvL0/333y/pk1WlxMRE/fjHP9Y999wjn8+nSy+9VL/+9a912223SZI++OADDRs2TNu3b9eMGTP0xhtvaMyYMdq1a5fS0tIkSbt27VJ6errefPNNjRo1Si+88IKysrJ05MgRJSUlSZKKiop05513qr6+XrGxsZ95PR0dHfrggw8UExMjh8PRE1MEAAB6mTFGTU1NSkpK0oAB51lPMkE0evRok5eXZ77xjW+YSy+91Hzxi180P//5z63j7777rpFkXnvtNb/XZWdnm9mzZxtjjPnjH/9oJJkTJ0741YwbN8788Ic/NMYY88QTTxiXy9Xl/C6Xy/zyl780xhjz7//+72bcuHF+x0+cOGEkmZdeeumc/Z8+fdr4fD5rq66uNpLY2NjY2NjY+uF25MiR8+aWcAXRe++9p3Xr1mnRokX6/ve/rz179ig3N1dOp1OzZ89WXV2dJCkxMdHvdYmJiTp06JAkqa6uTpGRkYqLi+tSc/b1dXV1SkhI6HL+hIQEv5rO54mLi1NkZKRV09nKlSu1fPnyLuNHjhyxtTIFAACCr7GxUcOGDVNMTMx564Iamjo6OjR+/HitWLFCknTttdfqwIEDWrdunWbPnm3Vdf6oyxjzmR9/da45V30gNZ+2dOlSLVq0yNo/O+mxsbGEJgAA+pnPyhZBfRB8yJAhGjNmjN/Y6NGjdfjwYUmS2+2WpC4rPfX19daqkNvtVmtrq7xe73lrjh071uX8x48f96vpfB6v16u2trYuK1BnOZ1OKyARlAAAuLgFNTRNmjRJb731lt/Y22+/reHDh0uSRowYIbfbrdLSUut4a2urysrKNHHiRElSamqqIiIi/Gpqa2tVVVVl1aSnp8vn82nPnj1Wze7du+Xz+fxqqqqqVFtba9WUlJTI6XQqNTW1h68cAAD0O+d94qmX7dmzx4SHh5sf/ehH5p133jGbN282AwcONE899ZRVs2rVKuNyuczzzz9vXn/9dfOtb33LDBkyxDQ2Nlo19957rxk6dKh58cUXzWuvvWZuvPFGc80115gzZ85YNTNnzjTjxo0zO3fuNDt37jRjx441WVlZ1vEzZ86YlJQUM3XqVPPaa6+ZF1980QwdOtTcd999tq/H5/MZScbn813gzAAAgL5i9/4d1NBkjDH/8z//Y1JSUozT6TRXXXWV37fnjDGmo6PDPPDAA8btdhun02m+8pWvmNdff92v5tSpU+a+++4zgwYNMlFRUSYrK8scPnzYr+ajjz4yt99+u4mJiTExMTHm9ttvN16v16/m0KFDJjMz00RFRZlBgwaZ++67z5w+fdr2tRCaAADof+zev4P6O00Xm8bGRrlcLvl8Pp5vAgCgn7B7/w76n1EBAADoDwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABvCg90AAADAuVy+ZJvf/vurMoPUySdYaQIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2BDU0LRs2TI5HA6/ze12W8eNMVq2bJmSkpIUFRWlKVOm6MCBA37v0dLSogULFig+Pl7R0dHKzs5WTU2NX43X65XH45HL5ZLL5ZLH41FDQ4NfzeHDhzVr1ixFR0crPj5eubm5am1t7bVrBwAA/UvQV5quvvpq1dbWWtvrr79uHVu9erUKCwu1du1a7d27V263W9OnT1dTU5NVk5eXp61bt6qoqEjl5eU6efKksrKy1N7ebtXk5OSosrJSxcXFKi4uVmVlpTwej3W8vb1dmZmZam5uVnl5uYqKirRlyxbl5+f3zSQAAICQFx70BsLD/VaXzjLG6Cc/+Yl+8IMf6Otf/7ok6Ve/+pUSExP19NNP65577pHP59MTTzyhX//615o2bZok6amnntKwYcP04osvasaMGXrjjTdUXFysXbt2KS0tTZL0+OOPKz09XW+99ZZGjRqlkpISVVdX68iRI0pKSpIkPfzww7rzzjv1ox/9SLGxsX00GwAAIFQFfaXpnXfeUVJSkkaMGKFvfvObeu+99yRJBw8eVF1dnTIyMqxap9OpyZMna8eOHZKkiooKtbW1+dUkJSUpJSXFqtm5c6dcLpcVmCRpwoQJcrlcfjUpKSlWYJKkGTNmqKWlRRUVFX+395aWFjU2NvptAADg4hTU0JSWlqYnn3xSf/jDH/T444+rrq5OEydO1EcffaS6ujpJUmJiot9rEhMTrWN1dXWKjIxUXFzceWsSEhK6nDshIcGvpvN54uLiFBkZadWcy8qVK63npFwul4YNG9bNGQAAAP1FUEPTV7/6Vf3Lv/yLxo4dq2nTpmnbtm2SPvkY7iyHw+H3GmNMl7HOOtecqz6Qms6WLl0qn89nbUeOHDlvXwAAoP8K+sdznxYdHa2xY8fqnXfesZ5z6rzSU19fb60Kud1utba2yuv1nrfm2LFjXc51/Phxv5rO5/F6vWpra+uyAvVpTqdTsbGxfhsAALg4hVRoamlp0RtvvKEhQ4ZoxIgRcrvdKi0ttY63traqrKxMEydOlCSlpqYqIiLCr6a2tlZVVVVWTXp6unw+n/bs2WPV7N69Wz6fz6+mqqpKtbW1Vk1JSYmcTqdSU1N79ZoBAED/ENRvzxUUFGjWrFm67LLLVF9frwcffFCNjY2aM2eOHA6H8vLytGLFCiUnJys5OVkrVqzQwIEDlZOTI0lyuVyaO3eu8vPzNXjwYA0aNEgFBQXWx32SNHr0aM2cOVPz5s3Thg0bJEl33323srKyNGrUKElSRkaGxowZI4/HozVr1ujEiRMqKCjQvHnzWD0CAACSghyaampq9K1vfUsffvihLr30Uk2YMEG7du3S8OHDJUmLFy/WqVOnNH/+fHm9XqWlpamkpEQxMTHWezzyyCMKDw/XrbfeqlOnTmnq1KnatGmTwsLCrJrNmzcrNzfX+pZddna21q5dax0PCwvTtm3bNH/+fE2aNElRUVHKycnRQw891EczAQAAQp3DGGOC3cTForGxUS6XSz6fjxUqAAAu0OVLtvntv78qs1fOY/f+HVLPNAEAAIQqQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbQiY0rVy5Ug6HQ3l5edaYMUbLli1TUlKSoqKiNGXKFB04cMDvdS0tLVqwYIHi4+MVHR2t7Oxs1dTU+NV4vV55PB65XC65XC55PB41NDT41Rw+fFizZs1SdHS04uPjlZubq9bW1t66XAAA0M+ERGjau3evfv7zn2vcuHF+46tXr1ZhYaHWrl2rvXv3yu12a/r06WpqarJq8vLytHXrVhUVFam8vFwnT55UVlaW2tvbrZqcnBxVVlaquLhYxcXFqqyslMfjsY63t7crMzNTzc3NKi8vV1FRkbZs2aL8/Pzev3gAANA/mCBramoyycnJprS01EyePNksXLjQGGNMR0eHcbvdZtWqVVbt6dOnjcvlMuvXrzfGGNPQ0GAiIiJMUVGRVXP06FEzYMAAU1xcbIwxprq62kgyu3btsmp27txpJJk333zTGGPM9u3bzYABA8zRo0etmmeeecY4nU7j8/lsX4vP5zOSuvUaAABwbsPv/1+/rbfYvX8HfaXpu9/9rjIzMzVt2jS/8YMHD6qurk4ZGRnWmNPp1OTJk7Vjxw5JUkVFhdra2vxqkpKSlJKSYtXs3LlTLpdLaWlpVs2ECRPkcrn8alJSUpSUlGTVzJgxQy0tLaqoqPi7vbe0tKixsdFvAwAAF6fwYJ68qKhIr732mvbu3dvlWF1dnSQpMTHRbzwxMVGHDh2yaiIjIxUXF9el5uzr6+rqlJCQ0OX9ExIS/Go6nycuLk6RkZFWzbmsXLlSy5cv/6zLBAAAF4GgrTQdOXJECxcu1FNPPaVLLrnk79Y5HA6/fWNMl7HOOtecqz6Qms6WLl0qn89nbUeOHDlvXwAAoP8KWmiqqKhQfX29UlNTFR4ervDwcJWVlenRRx9VeHi4tfLTeaWnvr7eOuZ2u9Xa2iqv13vemmPHjnU5//Hjx/1qOp/H6/Wqra2tywrUpzmdTsXGxvptAADg4hS00DR16lS9/vrrqqystLbx48fr9ttvV2Vlpa644gq53W6VlpZar2ltbVVZWZkmTpwoSUpNTVVERIRfTW1traqqqqya9PR0+Xw+7dmzx6rZvXu3fD6fX01VVZVqa2utmpKSEjmdTqWmpvbqPAAAgP4haM80xcTEKCUlxW8sOjpagwcPtsbz8vK0YsUKJScnKzk5WStWrNDAgQOVk5MjSXK5XJo7d67y8/M1ePBgDRo0SAUFBRo7dqz1YPno0aM1c+ZMzZs3Txs2bJAk3X333crKytKoUaMkSRkZGRozZow8Ho/WrFmjEydOqKCgQPPmzWP1CAAASAryg+CfZfHixTp16pTmz58vr9ertLQ0lZSUKCYmxqp55JFHFB4erltvvVWnTp3S1KlTtWnTJoWFhVk1mzdvVm5urvUtu+zsbK1du9Y6HhYWpm3btmn+/PmaNGmSoqKilJOTo4ceeqjvLhYAAIQ0hzHGBLuJi0VjY6NcLpd8Ph8rVAAAXKDLl2zz239/VWavnMfu/Tvov9MEAADQHxCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgQ0Ch6eDBgz3dBwAAQEgLKDRdeeWVuuGGG/TUU0/p9OnTPd0TAABAyAkoNP3lL3/Rtddeq/z8fLndbt1zzz3as2dPT/cGAAAQMgIKTSkpKSosLNTRo0e1ceNG1dXV6Utf+pKuvvpqFRYW6vjx4z3dJwAAQFBd0IPg4eHhuuWWW/Sb3/xGP/7xj/Xuu++qoKBAQ4cO1ezZs1VbW9tTfQIAAATVBYWmffv2af78+RoyZIgKCwtVUFCgd999Vy+99JKOHj2qr33taz3VJwAAQFCFB/KiwsJCbdy4UW+99ZZuuukmPfnkk7rppps0YMAnGWzEiBHasGGDrrrqqh5tFgAAIFgCCk3r1q3Tt7/9bd11111yu93nrLnsssv0xBNPXFBzAAAAoSKg0PTOO+98Zk1kZKTmzJkTyNsDAACEnICeadq4caN++9vfdhn/7W9/q1/96lcX3BQAAECoCSg0rVq1SvHx8V3GExIStGLFigtuCgAAINQEFJoOHTqkESNGdBkfPny4Dh8+fMFNAQAAhJqAQlNCQoL++te/dhn/y1/+osGDB19wUwAAAKEmoND0zW9+U7m5uXr55ZfV3t6u9vZ2vfTSS1q4cKG++c1v9nSPAAAAQRfQt+cefPBBHTp0SFOnTlV4+Cdv0dHRodmzZ/NMEwAAuCgFFJoiIyP17LPP6j//8z/1l7/8RVFRURo7dqyGDx/e0/0BAACEhIBC01kjR47UyJEje6oXAACAkBVQaGpvb9emTZv0xz/+UfX19ero6PA7/tJLL/VIcwAAAKEioNC0cOFCbdq0SZmZmUpJSZHD4ejpvgAAAEJKQKGpqKhIv/nNb3TTTTf1dD8AAAAhKaCfHIiMjNSVV17Z070AAACErIBCU35+vn7605/KGNPT/QAAAISkgD6eKy8v18svv6wXXnhBV199tSIiIvyOP//88z3SHAAAQKgIKDR97nOf0y233NLTvQAAAISsgELTxo0be7oPAACAkBbQM02SdObMGb344ovasGGDmpqaJEkffPCBTp482WPNAQAAhIqAVpoOHTqkmTNn6vDhw2ppadH06dMVExOj1atX6/Tp01q/fn1P9wkAABBUAa00LVy4UOPHj5fX61VUVJQ1fsstt+iPf/xjjzUHAAAQKgL+9tyrr76qyMhIv/Hhw4fr6NGjPdIYAABAKAlopamjo0Pt7e1dxmtqahQTE2P7fdatW6dx48YpNjZWsbGxSk9P1wsvvGAdN8Zo2bJlSkpKUlRUlKZMmaIDBw74vUdLS4sWLFig+Ph4RUdHKzs7WzU1NX41Xq9XHo9HLpdLLpdLHo9HDQ0NfjWHDx/WrFmzFB0drfj4eOXm5qq1tdX2tQAAgItbQKFp+vTp+slPfmLtOxwOnTx5Ug888EC3/rTK0KFDtWrVKu3bt0/79u3TjTfeqK997WtWMFq9erUKCwu1du1a7d27V263W9OnT7cePJekvLw8bd26VUVFRSovL9fJkyeVlZXlF+pycnJUWVmp4uJiFRcXq7KyUh6Pxzre3t6uzMxMNTc3q7y8XEVFRdqyZYvy8/MDmR4AAHAxMgE4evSoGTlypBk9erQJDw83EyZMMIMHDzajRo0yx44dC+QtLXFxceYXv/iF6ejoMG6326xatco6dvr0aeNyucz69euNMcY0NDSYiIgIU1RU5NfbgAEDTHFxsTHGmOrqaiPJ7Nq1y6rZuXOnkWTefPNNY4wx27dvNwMGDDBHjx61ap555hnjdDqNz+ez3bvP5zOSuvUaAABwbsPv/1+/rbfYvX8HtNKUlJSkyspKFRQU6J577tG1116rVatWaf/+/UpISAgovLW3t6uoqEjNzc1KT0/XwYMHVVdXp4yMDKvG6XRq8uTJ2rFjhySpoqJCbW1tfjVJSUlKSUmxanbu3CmXy6W0tDSrZsKECXK5XH41KSkpSkpKsmpmzJihlpYWVVRU/N2eW1pa1NjY6LcBAICLU0APgktSVFSUvv3tb+vb3/72BTXw+uuvKz09XadPn9Y//dM/aevWrRozZowVaBITE/3qExMTdejQIUlSXV2dIiMjFRcX16Wmrq7OqjlXkEtISPCr6XyeuLg4RUZGWjXnsnLlSi1fvrybVwwAAPqjgELTk08+ed7js2fPtv1eo0aNUmVlpRoaGrRlyxbNmTNHZWVl1nGHw+FXb4zpMtZZ55pz1QdS09nSpUu1aNEia7+xsVHDhg07b28AAKB/Cig0LVy40G+/ra1NH3/8sSIjIzVw4MBuhabIyEhdeeWVkqTx48dr7969+ulPf6r7779f0ierQEOGDLHq6+vrrVUht9ut1tZWeb1ev9Wm+vp6TZw40ao5duxYl/MeP37c7312797td9zr9aqtra3LCtSnOZ1OOZ1O29cKAAD6r4CeafJ6vX7byZMn9dZbb+lLX/qSnnnmmQtqyBijlpYWjRgxQm63W6Wlpdax1tZWlZWVWYEoNTVVERERfjW1tbWqqqqyatLT0+Xz+bRnzx6rZvfu3fL5fH41VVVVqq2ttWpKSkrkdDqVmpp6QdcDAAAuDgE/09RZcnKyVq1apTvuuENvvvmmrdd8//vf11e/+lUNGzZMTU1NKioq0p/+9CcVFxfL4XAoLy9PK1asUHJyspKTk7VixQoNHDhQOTk5kiSXy6W5c+cqPz9fgwcP1qBBg1RQUKCxY8dq2rRpkqTRo0dr5syZmjdvnjZs2CBJuvvuu5WVlaVRo0ZJkjIyMjRmzBh5PB6tWbNGJ06cUEFBgebNm6fY2NiemiIAANCP9VhokqSwsDB98MEHtuuPHTsmj8ej2tpauVwujRs3TsXFxZo+fbokafHixTp16pTmz58vr9ertLQ0lZSU+P2A5iOPPKLw8HDdeuutOnXqlKZOnapNmzYpLCzMqtm8ebNyc3Otb9llZ2dr7dq1fn1v27ZN8+fP16RJkxQVFaWcnBw99NBDFzolAADgIuEwxpjuvuj3v/+9374xRrW1tVq7dq2GDRvm96ve/0gaGxvlcrnk8/lYoQIA4AJdvmSb3/77qzJ75Tx2798BrTTdfPPNfvsOh0OXXnqpbrzxRj388MOBvCUAAEBICyg0dXR09HQfAAAAIS2gb88BAAD8owlopenTP+j4WQoLCwM5BQAAQEgJKDTt379fr732ms6cOWN9bf/tt99WWFiYrrvuOqvus365GwAAoL8IKDTNmjVLMTEx+tWvfmX9ErfX69Vdd92lL3/5y8rPz+/RJgEAAIItoGeaHn74Ya1cudLvT5fExcXpwQcf5NtzAADgohRQaGpsbDzn33Orr69XU1PTBTcFAAAQagIKTbfccovuuusuPffcc6qpqVFNTY2ee+45zZ07V1//+td7ukcAAICgC+iZpvXr16ugoEB33HGH2traPnmj8HDNnTtXa9as6dEGAQAAQkFAoWngwIF67LHHtGbNGr377rsyxujKK69UdHR0T/cHAAAQEi7oxy1ra2tVW1urkSNHKjo6WgH8GTsAAIB+IaDQ9NFHH2nq1KkaOXKkbrrpJtXW1kqSvvOd7/BzAwAA4KIUUGj63ve+p4iICB0+fFgDBw60xm+77TYVFxf3WHMAAAChIqBnmkpKSvSHP/xBQ4cO9RtPTk7WoUOHeqQxAACAUBLQSlNzc7PfCtNZH374oZxO5wU3BQAAEGoCCk1f+cpX9OSTT1r7DodDHR0dWrNmjW644YYeaw4AACBUBPTx3Jo1azRlyhTt27dPra2tWrx4sQ4cOKATJ07o1Vdf7ekeAQAAgi6glaYxY8bor3/9q66//npNnz5dzc3N+vrXv679+/frC1/4Qk/3CAAAEHTdXmlqa2tTRkaGNmzYoOXLl/dGTwAAACGn2ytNERERqqqqksPh6I1+AAAAQlJAH8/Nnj1bTzzxRE/3AgAAELICehC8tbVVv/jFL1RaWqrx48d3+ZtzhYWFPdIcAABAqOhWaHrvvfd0+eWXq6qqStddd50k6e233/ar4WM7AABwMepWaEpOTlZtba1efvllSZ/82ZRHH31UiYmJvdIcAABAqOjWM03GGL/9F154Qc3NzT3aEAAAQCgK6EHwszqHKAAAgItVt0KTw+Ho8swSzzABAIB/BN16pskYozvvvNP6o7ynT5/Wvffe2+Xbc88//3zPdQgAABACuhWa5syZ47d/xx139GgzAAAAoapboWnjxo291QcAAEBIu6AHwQEAAP5REJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALAhqKFp5cqV+ud//mfFxMQoISFBN998s9566y2/GmOMli1bpqSkJEVFRWnKlCk6cOCAX01LS4sWLFig+Ph4RUdHKzs7WzU1NX41Xq9XHo9HLpdLLpdLHo9HDQ0NfjWHDx/WrFmzFB0drfj4eOXm5qq1tbVXrh0AAPQvQQ1NZWVl+u53v6tdu3aptLRUZ86cUUZGhpqbm62a1atXq7CwUGvXrtXevXvldrs1ffp0NTU1WTV5eXnaunWrioqKVF5erpMnTyorK0vt7e1WTU5OjiorK1VcXKzi4mJVVlbK4/FYx9vb25WZmanm5maVl5erqKhIW7ZsUX5+ft9MBgAACG0mhNTX1xtJpqyszBhjTEdHh3G73WbVqlVWzenTp43L5TLr1683xhjT0NBgIiIiTFFRkVVz9OhRM2DAAFNcXGyMMaa6utpIMrt27bJqdu7caSSZN9980xhjzPbt282AAQPM0aNHrZpnnnnGOJ1O4/P5ztnv6dOnjc/ns7YjR44YSX+3HgAA2Df8/v/123qLz+ezdf8OqWeafD6fJGnQoEGSpIMHD6qurk4ZGRlWjdPp1OTJk7Vjxw5JUkVFhdra2vxqkpKSlJKSYtXs3LlTLpdLaWlpVs2ECRPkcrn8alJSUpSUlGTVzJgxQy0tLaqoqDhnvytXrrQ+7nO5XBo2bFhPTAMAAAhBIROajDFatGiRvvSlLyklJUWSVFdXJ0lKTEz0q01MTLSO1dXVKTIyUnFxceetSUhI6HLOhIQEv5rO54mLi1NkZKRV09nSpUvl8/ms7ciRI929bAAA0E+EB7uBs+677z799a9/VXl5eZdjDofDb98Y02Wss84156oPpObTnE6nnE7nefsAAAAXh5BYaVqwYIF+//vf6+WXX9bQoUOtcbfbLUldVnrq6+utVSG3263W1lZ5vd7z1hw7dqzLeY8fP+5X0/k8Xq9XbW1tXVagAADAP56ghiZjjO677z49//zzeumllzRixAi/4yNGjJDb7VZpaak11traqrKyMk2cOFGSlJqaqoiICL+a2tpaVVVVWTXp6eny+Xzas2ePVbN79275fD6/mqqqKtXW1lo1JSUlcjqdSk1N7fmLBwAA/UpQP5777ne/q6efflr//d//rZiYGGulx+VyKSoqSg6HQ3l5eVqxYoWSk5OVnJysFStWaODAgcrJybFq586dq/z8fA0ePFiDBg1SQUGBxo4dq2nTpkmSRo8erZkzZ2revHnasGGDJOnuu+9WVlaWRo0aJUnKyMjQmDFj5PF4tGbNGp04cUIFBQWaN2+eYmNjgzA7AAAglAQ1NK1bt06SNGXKFL/xjRs36s4775QkLV68WKdOndL8+fPl9XqVlpamkpISxcTEWPWPPPKIwsPDdeutt+rUqVOaOnWqNm3apLCwMKtm8+bNys3Ntb5ll52drbVr11rHw8LCtG3bNs2fP1+TJk1SVFSUcnJy9NBDD/XS1QMAgP7EYYwxwW7iYtHY2CiXyyWfz8fqFAAAF+jyJdv89t9fldkr57F7/w6JB8EBAABCHaEJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADAhqCGpj//+c+aNWuWkpKS5HA49Lvf/c7vuDFGy5YtU1JSkqKiojRlyhQdOHDAr6alpUULFixQfHy8oqOjlZ2drZqaGr8ar9crj8cjl8sll8slj8ejhoYGv5rDhw9r1qxZio6OVnx8vHJzc9Xa2toblw0AAPqhoIam5uZmXXPNNVq7du05j69evVqFhYVau3at9u7dK7fbrenTp6upqcmqycvL09atW1VUVKTy8nKdPHlSWVlZam9vt2pycnJUWVmp4uJiFRcXq7KyUh6Pxzre3t6uzMxMNTc3q7y8XEVFRdqyZYvy8/N77+IBAED/YkKEJLN161Zrv6Ojw7jdbrNq1Spr7PTp08blcpn169cbY4xpaGgwERERpqioyKo5evSoGTBggCkuLjbGGFNdXW0kmV27dlk1O3fuNJLMm2++aYwxZvv27WbAgAHm6NGjVs0zzzxjnE6n8fl8tq/B5/MZSd16DQAAOLfh9/+v39Zb7N6/Q/aZpoMHD6qurk4ZGRnWmNPp1OTJk7Vjxw5JUkVFhdra2vxqkpKSlJKSYtXs3LlTLpdLaWlpVs2ECRPkcrn8alJSUpSUlGTVzJgxQy0tLaqoqPi7Pba0tKixsdFvAwAAF6eQDU11dXWSpMTERL/xxMRE61hdXZ0iIyMVFxd33pqEhIQu75+QkOBX0/k8cXFxioyMtGrOZeXKldZzUi6XS8OGDevmVQIAgP4iZEPTWQ6Hw2/fGNNlrLPONeeqD6Sms6VLl8rn81nbkSNHztsXAADov0I2NLndbknqstJTX19vrQq53W61trbK6/Wet+bYsWNd3v/48eN+NZ3P4/V61dbW1mUF6tOcTqdiY2P9NgAAcHEK2dA0YsQIud1ulZaWWmOtra0qKyvTxIkTJUmpqamKiIjwq6mtrVVVVZVVk56eLp/Ppz179lg1u3fvls/n86upqqpSbW2tVVNSUiKn06nU1NRevU4AANA/hAfz5CdPntT//d//WfsHDx5UZWWlBg0apMsuu0x5eXlasWKFkpOTlZycrBUrVmjgwIHKycmRJLlcLs2dO1f5+fkaPHiwBg0apIKCAo0dO1bTpk2TJI0ePVozZ87UvHnztGHDBknS3XffraysLI0aNUqSlJGRoTFjxsjj8WjNmjU6ceKECgoKNG/ePFaPAACApCCHpn379umGG26w9hctWiRJmjNnjjZt2qTFixfr1KlTmj9/vrxer9LS0lRSUqKYmBjrNY888ojCw8N166236tSpU5o6dao2bdqksLAwq2bz5s3Kzc21vmWXnZ3t99tQYWFh2rZtm+bPn69JkyYpKipKOTk5euihh3p7CgAAQD/hMMaYYDdxsWhsbJTL5ZLP52OFCgCAC3T5km1++++vyuyV89i9f4fsM00AAAChhNAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6Gpk8cee0wjRozQJZdcotTUVL3yyivBbgkAAISA8GA3EEqeffZZ5eXl6bHHHtOkSZO0YcMGffWrX1V1dbUuu+yyYLcHAMBF6/Il24LdwmdyGGNMsJsIFWlpabruuuu0bt06a2z06NG6+eabtXLlys98fWNjo1wul3w+n2JjY3u0t3P9Y3p/VWaPngMAgL4SSEjqrfue3fs3K01/09raqoqKCi1ZssRvPCMjQzt27Djna1paWtTS0mLt+3w+SZ9Mfk/raPm4y1hvnAcAgO5IeeAPXcaqls/4zJpA9NZ97+z7ftY6EqHpbz788EO1t7crMTHRbzwxMVF1dXXnfM3KlSu1fPnyLuPDhg3rlR47c/2kT04DAEC39Nb9qbfve01NTXK5XH/3OKGpE4fD4bdvjOkydtbSpUu1aNEia7+jo0MnTpzQ4MGD/+5rAtHY2Khhw4bpyJEjPf6xH/4/5rnvMNd9g3nuG8xz3+jNeTbGqKmpSUlJSeetIzT9TXx8vMLCwrqsKtXX13dZfTrL6XTK6XT6jX3uc5/rrRYVGxvL/yD7APPcd5jrvsE89w3muW/01jyfb4XpLH5y4G8iIyOVmpqq0tJSv/HS0lJNnDgxSF0BAIBQwUrTpyxatEgej0fjx49Xenq6fv7zn+vw4cO69957g90aAAAIMkLTp9x222366KOP9B//8R+qra1VSkqKtm/fruHDhwe1L6fTqQceeKDLR4HoWcxz32Gu+wbz3DeY574RCvPM7zQBAADYwDNNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhKUQ89thjGjFihC655BKlpqbqlVdeOW99WVmZUlNTdckll+iKK67Q+vXr+6jT/q078/z8889r+vTpuvTSSxUbG6v09HT94Q8980cnL3bd/fd81quvvqrw8HB98Ytf7N0GLyLdneuWlhb94Ac/0PDhw+V0OvWFL3xBv/zlL/uo2/6ru/O8efNmXXPNNRo4cKCGDBmiu+66Sx999FEfdds//fnPf9asWbOUlJQkh8Oh3/3ud5/5mj6/FxoEXVFRkYmIiDCPP/64qa6uNgsXLjTR0dHm0KFD56x/7733zMCBA83ChQtNdXW1efzxx01ERIR57rnn+rjz/qW787xw4ULz4x//2OzZs8e8/fbbZunSpSYiIsK89tprfdx5/9LdeT6roaHBXHHFFSYjI8Ncc801fdNsPxfIXGdnZ5u0tDRTWlpqDh48aHbv3m1effXVPuy6/+nuPL/yyitmwIAB5qc//al57733zCuvvGKuvvpqc/PNN/dx5/3L9u3bzQ9+8AOzZcsWI8ls3br1vPXBuBcSmkLA9ddfb+69916/sauuusosWbLknPWLFy82V111ld/YPffcYyZMmNBrPV4MujvP5zJmzBizfPnynm7tohLoPN92223m3/7t38wDDzxAaLKpu3P9wgsvGJfLZT766KO+aO+i0d15XrNmjbniiiv8xh599FEzdOjQXuvxYmMnNAXjXsjHc0HW2tqqiooKZWRk+I1nZGRox44d53zNzp07u9TPmDFD+/btU1tbW6/12p8FMs+ddXR0qKmpSYMGDeqNFi8Kgc7zxo0b9e677+qBBx7o7RYvGoHM9e9//3uNHz9eq1ev1uc//3mNHDlSBQUFOnXqVF+03C8FMs8TJ05UTU2Ntm/fLmOMjh07pueee06ZmZl90fI/jGDcC/kzKkH24Ycfqr29XYmJiX7jiYmJqqurO+dr6urqzll/5swZffjhhxoyZEiv9dtfBTLPnT388MNqbm7Wrbfe2hstXhQCmed33nlHS5Ys0SuvvKLwcP4vya5A5vq9995TeXm5LrnkEm3dulUffvih5s+frxMnTvBc098RyDxPnDhRmzdv1m233abTp0/rzJkzys7O1n/913/1Rcv/MIJxL2SlKUQ4HA6/fWNMl7HPqj/XOPx1d57PeuaZZ7Rs2TI9++yzSkhI6K32Lhp257m9vV05OTlavny5Ro4c2VftXVS682+6o6NDDodDmzdv1vXXX6+bbrpJhYWF2rRpE6tNn6E781xdXa3c3Fz98Ic/VEVFhYqLi3Xw4EH++Hsv6Ot7If9ZF2Tx8fEKCwvr8l8s9fX1XRL0WW63+5z14eHhGjx4cK/12p8FMs9nPfvss5o7d65++9vfatq0ab3ZZr/X3XluamrSvn37tH//ft13332SPrmxG2MUHh6ukpIS3XjjjX3Se38TyL/pIUOG6POf/7xcLpc1Nnr0aBljVFNTo+Tk5F7tuT8KZJ5XrlypSZMm6V//9V8lSePGjVN0dLS+/OUv68EHH+TTgB4SjHshK01BFhkZqdTUVJWWlvqNl5aWauLEied8TXp6epf6kpISjR8/XhEREb3Wa38WyDxLn6ww3XnnnXr66ad5HsGG7s5zbGysXn/9dVVWVlrbvffeq1GjRqmyslJpaWl91Xq/E8i/6UmTJumDDz7QyZMnrbG3335bAwYM0NChQ3u13/4qkHn++OOPNWCA/+01LCxM0v9fCcGFC8q9sNceMYdtZ7/O+sQTT5jq6mqTl5dnoqOjzfvvv2+MMWbJkiXG4/FY9We/Zvm9733PVFdXmyeeeIKfHLChu/P89NNPm/DwcPOzn/3M1NbWWltDQ0OwLqFf6O48d8a35+zr7lw3NTWZoUOHmm984xvmwIEDpqyszCQnJ5vvfOc7wbqEfqG787xx40YTHh5uHnvsMfPuu++a8vJyM378eHP99dcH6xL6haamJrN//36zf/9+I8kUFhaa/fv3Wz/tEAr3QkJTiPjZz35mhg8fbiIjI811111nysrKrGNz5swxkydP9qv/05/+ZK699loTGRlpLr/8crNu3bo+7rh/6s48T5482Ujqss2ZM6fvG+9nuvvv+dMITd3T3bl+4403zLRp00xUVJQZOnSoWbRokfn444/7uOv+p7vz/Oijj5oxY8aYqKgoM2TIEHP77bebmpqaPu66f3n55ZfP+/+5oXAvdBjDWiEAAMBn4ZkmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGz4f1fSkcDP03YFAAAAAElFTkSuQmCC",
+ "text/plain": [
+ "