diff --git a/analysis/Python_scripts/scatter_plot.png b/analysis/Python_scripts/scatter_plot.png new file mode 100644 index 0000000..e1808d9 Binary files /dev/null and b/analysis/Python_scripts/scatter_plot.png differ diff --git a/analysis/Python_scripts/scatterplot.ipynb b/analysis/Python_scripts/scatterplot.ipynb index a9f0f01..f2cbe54 100644 --- a/analysis/Python_scripts/scatterplot.ipynb +++ b/analysis/Python_scripts/scatterplot.ipynb @@ -2,29 +2,25 @@ "cells": [ { "cell_type": "code", - "execution_count": 25, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import plotly.graph_objs as go\n", "import pandas as pd\n", - "import seaborn as sns\n", - "from matplotlib import pyplot as plt\n", "from rdkit import Chem\n", - "import os\n", - "import plotly.graph_objs as go\n", - "from plotly.subplots import make_subplots\n", "from matchms.importing import load_from_msp\n", "from matchms.logging_functions import set_matchms_logger_level\n", + "\n", "from utils import is_spectrum_for_compound\n", "\n", - "set_matchms_logger_level('ERROR')\n" + "set_matchms_logger_level('ERROR')" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -33,23 +29,46 @@ "matchms_scores = matchms_scores[matchms_scores.apply(lambda x: is_spectrum_for_compound(x['query'], x['reference']), axis=1)]\n", "\n", "molecules = Chem.SDMolSupplier(\"../../data/RECETOX_GC-EI-MS_20201028.sdf\")\n", + "\n", "predicted_spectra = list(load_from_msp(\"../data/filtered/simulated_matchms_filter_1%I_all_peaks.msp\"))\n", - "spectra_metadata= pd.DataFrame.from_dict([x.metadata for x in predicted_spectra])\n", - "spectra_metadata.rename(columns={'compound_name': 'query'}, inplace=True)" + "spectra_metadata = pd.DataFrame.from_dict([x.metadata for x in predicted_spectra])\n", + "spectra_metadata.rename(columns={'compound_name': 'query'}, inplace=True)\n", + "\n", + "reference_spectra = list(load_from_msp(\"../data/experimental/RECETOX_GC-EI_MS_20201028.msp\"))\n", + "reference_spectra_metadata = pd.DataFrame.from_dict([x.metadata for x in reference_spectra])\n", + "reference_spectra_metadata.rename(columns={'compound_name': 'reference'}, inplace=True)\n" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "merged = matchms_scores.merge(spectra_metadata, on=\"query\", how=\"inner\")" + "merged = matchms_scores.merge(spectra_metadata, on=\"query\", how=\"inner\")\n", + "merged.rename(columns={'num_peaks': 'n_peaks_query'}, inplace=True)\n", + "\n", + "merged = merged.merge(reference_spectra_metadata, on=\"reference\", how=\"inner\")\n", + "merged.rename(columns={'num_peaks': 'n_peaks_reference'}, inplace=True)" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "merged['CosineHungarian_0.01_0.0_1.0_matches'] = pd.to_numeric(merged['CosineHungarian_0.01_0.0_1.0_matches'], errors='coerce')\n", + "merged['n_peaks_query'] = pd.to_numeric(merged['n_peaks_query'], errors='coerce')\n", + "merged['n_peaks_reference'] = pd.to_numeric(merged['n_peaks_reference'], errors='coerce')\n", + "\n", + "merged['FractionQuery'] = merged['CosineHungarian_0.01_0.0_1.0_matches'] / merged['n_peaks_query']\n", + "merged['FractionReference'] = merged['CosineHungarian_0.01_0.0_1.0_matches'] / merged['n_peaks_reference']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -61,12 +80,727 @@ "data": [ { "marker": { - "color": "blue", + "color": [ + 0.7222222222222222, + 0.68, + 0.7906976744186046, + 0.47619047619047616, + 0.1323529411764706, + 0.4230769230769231, + 0.3877551020408163, + 0.47619047619047616, + 0.25274725274725274, + 0.56, + 0.4444444444444444, + 0.12903225806451613, + 0.5084745762711864, + 0.625, + 0.75, + 0.47058823529411764, + 0.5045045045045045, + 0.6481481481481481, + 0.3544973544973545, + 0.7674418604651163, + 0.5507246376811594, + 0.7096774193548387, + 0.5373134328358209, + 0.6875, + 0.6363636363636364, + 0.6792452830188679, + 0.5777777777777777, + 0.6515151515151515, + 0.75, + 0.5925925925925926, + 0.6551724137931034, + 0.6170212765957447, + 0.6271186440677966, + 0.6147540983606558, + 0.7142857142857143, + 0.7213114754098361, + 0.7254901960784313, + 0.651685393258427, + 0.7241379310344828, + 0.711864406779661, + 0.6847826086956522, + 0.6153846153846154, + 0.6597938144329897, + 0.734375, + 0.6036036036036037, + 0.4909090909090909, + 0.4146341463414634, + 0.8636363636363636, + 0.9047619047619048, + 0.34782608695652173, + 0.4032258064516129, + 0.43636363636363634, + 0.43478260869565216, + 0.265625, + 0.4657534246575342, + 0.39344262295081966, + 0.5076923076923077, + 0.32926829268292684, + 0.1388888888888889, + 0.03225806451612903, + 0.5853658536585366, + 0.6923076923076923, + 0.43636363636363634, + 0.42857142857142855, + 0.48717948717948717, + 0.49019607843137253, + 0.3375, + 0.29473684210526313, + 0.3472222222222222, + 0.18439716312056736, + 0.36363636363636365, + 0.36363636363636365, + 0.16853932584269662, + 0.1346153846153846, + 0.37777777777777777, + 0.4818181818181818, + 0.6105263157894737, + 0.5178571428571429, + 0.7361111111111112, + 0.10119047619047619, + 0.3786407766990291, + 0.07692307692307693, + 0.24561403508771928, + 0.6832298136645962, + 0.48148148148148145, + 0.5476190476190477, + 0.6428571428571429, + 0.2247191011235955, + 0.54, + 0.3111111111111111, + 0.0425531914893617, + 0.7105263157894737, + 0.65, + 0.35555555555555557, + 0.3023255813953488, + 0.44, + 0.5909090909090909, + 0.7241379310344828, + 0.47058823529411764, + 0.7241379310344828, + 0.5, + 0.5555555555555556, + 0.75, + 0.8571428571428571, + 0.6666666666666666, + 0.7083333333333334, + 0.875, + 0.7741935483870968, + 0.6428571428571429, + 0.6923076923076923, + 0.7857142857142857, + 0.825, + 0.7608695652173914, + 0.7647058823529411, + 0.7966101694915254, + 0.5542168674698795, + 0.40384615384615385, + 0.6, + 0.47150259067357514, + 0.5149700598802395, + 0.4188034188034188, + 0.6785714285714286, + 0.6518987341772152, + 0.33691756272401435, + 0.32388663967611336, + 0.5522388059701493, + 0.7560975609756098, + 0.6, + 0.5441176470588235, + 0.4166666666666667, + 0.07692307692307693, + 0.2903225806451613, + 0.32, + 0.27450980392156865, + 0.21052631578947367, + 0.19753086419753085, + 0.38461538461538464, + 0.10256410256410256, + 0.14285714285714285, + 0.08823529411764706, + 0.3333333333333333, + 0.20512820512820512, + 0.5454545454545454, + 0.16058394160583941, + 0.5510204081632653, + 0.8125, + 0.8461538461538461, + 0.5555555555555556, + 0.45454545454545453, + 0.76, + 0.5238095238095238, + 0.6875, + 0.6764705882352942, + 0.5, + 0.4507042253521127, + 0.4222222222222222, + 0.022727272727272728, + 0.6, + 0.2608695652173913, + 0.09345794392523364, + 0.3037974683544304, + 0.22666666666666666, + 0.14545454545454545, + 0.36, + 0.6341463414634146, + 0.189873417721519, + 0.36046511627906974, + 0.4666666666666667, + 0.08108108108108109, + 0.19696969696969696, + 0.3118279569892473, + 0.7058823529411765, + 0.7209302325581395, + 0.6551724137931034, + 0.68, + 0.654320987654321, + 0.6349206349206349, + 0.546875, + 0.5461538461538461, + 0.6074074074074074, + 0.6436781609195402, + 0.6176470588235294, + 0.4861878453038674, + 0.5535714285714286, + 0.45989304812834225, + 0.40625, + 0.35365853658536583, + 0.5865384615384616, + 0.21052631578947367, + 0.41025641025641024, + 0.3793103448275862, + 0.2708333333333333, + 0.022900763358778626, + 0.8333333333333334, + 0.29411764705882354, + 0.25925925925925924, + 0.03529411764705882, + 0.30303030303030304, + 0.5918367346938775, + 0.2777777777777778, + 0.14516129032258066, + 0.02727272727272727, + 0.5333333333333333, + 0.39285714285714285, + 0.3076923076923077, + 0.4, + 0.2631578947368421, + 0.375, + 0.38235294117647056, + 0.5, + 0.2857142857142857, + 0.9375, + 0.48148148148148145, + 0.5714285714285714, + 0.2463768115942029, + 0.45614035087719296, + 0.4107142857142857, + 0.6, + 0.2857142857142857, + 0.38461538461538464, + 0.5606060606060606, + 0.35294117647058826, + 0.23076923076923078, + 0.18181818181818182, + 0.45454545454545453, + 0.4666666666666667, + 0.6071428571428571, + 0.45, + 0.3783783783783784, + 0.3262411347517731, + 0.3384615384615385, + 0.4067796610169492, + 0.48484848484848486, + 0.08571428571428572, + 0.6304347826086957, + 0.9166666666666666, + 0.5625, + 0.3373493975903614, + 0.3829787234042553, + 0.5833333333333334, + 0.4878048780487805, + 0.6206896551724138, + 0.5833333333333334, + 0.6428571428571429, + 0.21649484536082475, + 0.5, + 0.20754716981132076, + 0.6065573770491803, + 0.4594594594594595, + 0.23404255319148937, + 0.5, + 0.9230769230769231, + 0.5238095238095238, + 0.5813953488372093, + 0.5490196078431373, + 0.574468085106383, + 0.7777777777777778, + 0.5, + 0.8947368421052632, + 0.625, + 0.47058823529411764, + 0.6904761904761905, + 0.5441176470588235, + 0.6666666666666666, + 0.2857142857142857, + 0.17333333333333334, + 0.2631578947368421, + 0.45714285714285713, + 0.05, + 0.24390243902439024, + 0.65, + 0.3333333333333333, + 0.375, + 0.3898305084745763, + 0.3333333333333333, + 0.4411764705882353, + 0.26785714285714285, + 0.2727272727272727, + 0.2545454545454545, + 0.5681818181818182, + 0.6578947368421053, + 0.1076923076923077, + 0.048507462686567165, + 0.6923076923076923, + 0.39473684210526316, + 0.32, + 0.2, + 0.2653061224489796, + 0.7857142857142857, + 0.4642857142857143, + 0.8421052631578947, + 0.7560975609756098, + 0.5897435897435898, + 0.6388888888888888, + 0.4375, + 0.5319148936170213, + 0.39344262295081966, + 0.20689655172413793, + 0.3488372093023256, + 0.4027777777777778, + 0.4634146341463415, + 0.5555555555555556, + 0.37777777777777777, + 0.4074074074074074, + 0.47435897435897434, + 0.5098039215686274, + 0.5, + 0.6875, + 0.2891566265060241, + 0.5909090909090909, + 0.42105263157894735, + 0.5531914893617021, + 0.4074074074074074, + 0.10344827586206896, + 0.3492063492063492, + 0.265625, + 0.4838709677419355, + 0.35507246376811596, + 0.6, + 0.8571428571428571, + 0.026785714285714284, + 0.3, + 0.08227848101265822, + 0.5247524752475248, + 0.41379310344827586, + 0.9230769230769231, + 0.5568181818181818, + 0.5373134328358209, + 0.39285714285714285, + 0.4461538461538462, + 0, + 0, + 0 + ], + "colorbar": { + "title": { + "text": "Reference Matched %" + } + }, + "colorscale": [ + [ + 0, + "#440154" + ], + [ + 0.1111111111111111, + "#482878" + ], + [ + 0.2222222222222222, + "#3e4989" + ], + [ + 0.3333333333333333, + "#31688e" + ], + [ + 0.4444444444444444, + "#26828e" + ], + [ + 0.5555555555555556, + "#1f9e89" + ], + [ + 0.6666666666666666, + "#35b779" + ], + [ + 0.7777777777777778, + "#6ece58" + ], + [ + 0.8888888888888888, + "#b5de2b" + ], + [ + 1, + "#fde725" + ] + ], "opacity": 0.5, - "size": 5 + "size": [ + 7.027027027027027, + 5.483870967741935, + 7.23404255319149, + 2.9411764705882355, + 2.3684210526315788, + 8.8, + 7.755102040816326, + 4.545454545454545, + 3.8016528925619837, + 7.567567567567568, + 4.615384615384616, + 5.853658536585366, + 10.434782608695652, + 2.28310502283105, + 1.935483870967742, + 3.116883116883117, + 6.956521739130435, + 6.481481481481481, + 8.427672955974844, + 7.586206896551724, + 6.72566371681416, + 5.7894736842105265, + 8.089887640449438, + 6.24113475177305, + 6.511627906976745, + 5, + 2.988505747126437, + 6.615384615384615, + 6.315789473684211, + 8.571428571428571, + 6.972477064220184, + 8.169014084507042, + 8.862275449101796, + 9.25925925925926, + 7.518796992481203, + 7.394957983193278, + 7.789473684210526, + 8.22695035460993, + 5.793103448275863, + 6.829268292682928, + 9.130434782608695, + 8.64864864864865, + 8.767123287671232, + 6.7625899280575545, + 8.758169934640524, + 8.181818181818182, + 4.657534246575342, + 5.671641791044776, + 6.129032258064516, + 7.5, + 6.8493150684931505, + 10, + 8.450704225352112, + 8.292682926829269, + 7.391304347826086, + 5.217391304347826, + 6.226415094339623, + 6, + 4.040404040404041, + 4.615384615384616, + 4.660194174757281, + 3.302752293577982, + 6.956521739130435, + 4.477611940298508, + 5.588235294117647, + 8.620689655172413, + 3.552631578947368, + 3.6842105263157894, + 3.7593984962406015, + 3.9097744360902253, + 3.2, + 4.028776978417266, + 2.142857142857143, + 4.912280701754385, + 5.76271186440678, + 8.760330578512397, + 12.083333333333332, + 11.0828025477707, + 8.833333333333332, + 15.454545454545453, + 6.782608695652174, + 3.8461538461538463, + 10.37037037037037, + 2.6699029126213594, + 5.492957746478874, + 6.666666666666666, + 7.397260273972602, + 3.2608695652173916, + 7.826086956521739, + 12.173913043478262, + 8, + 7.826086956521739, + 7.123287671232877, + 8, + 5.531914893617022, + 9.565217391304348, + 9.12280701754386, + 10.769230769230768, + 8.421052631578947, + 10.769230769230768, + 12.5, + 11.764705882352942, + 5.217391304347826, + 5.806451612903226, + 9.777777777777777, + 8.947368421052632, + 5.6000000000000005, + 4.948453608247423, + 8.372093023255815, + 10.112359550561798, + 5.7894736842105265, + 5.238095238095238, + 5.785123966942148, + 1.870503597122302, + 6.962962962962963, + 7.1875, + 3.727810650887574, + 7.751196172248804, + 7.309236947791165, + 5.391849529780565, + 7.313432835820896, + 6.810035842293907, + 7.803030303030303, + 8.468468468468469, + 7.8817733990147785, + 5.522388059701493, + 5, + 4.675324675324675, + 5.362318840579711, + 4.054054054054054, + 2.4242424242424243, + 8, + 8.88888888888889, + 11.200000000000001, + 1.5384615384615385, + 8.421052631578947, + 7.8431372549019605, + 3.2, + 5.217391304347826, + 2.727272727272727, + 2.1052631578947367, + 11.428571428571427, + 6.233766233766234, + 5, + 13.5, + 7.222222222222222, + 9.166666666666666, + 8.695652173913043, + 9.75609756097561, + 9.047619047619047, + 10.731707317073171, + 9.565217391304348, + 10.454545454545453, + 6.086956521739131, + 9.846153846153847, + 9.743589743589743, + 8, + 6.101694915254238, + 10.285714285714285, + 8.695652173913043, + 5.647058823529411, + 9.444444444444445, + 10, + 7.826086956521739, + 5.7142857142857135, + 10, + 6.813186813186814, + 3.333333333333333, + 6.666666666666666, + 7.222222222222222, + 7.785234899328859, + 10.434782608695652, + 5.344827586206896, + 7.835051546391752, + 6.601941747572816, + 9.298245614035087, + 8.24742268041237, + 9.333333333333334, + 9.102564102564102, + 9.479768786127167, + 8.484848484848484, + 5.217391304347826, + 11.503267973856207, + 7.79874213836478, + 6.615384615384615, + 6.070038910505836, + 5.742574257425742, + 8.714285714285715, + 11.428571428571427, + 6.808510638297872, + 4.888888888888888, + 8.125, + 1.875, + 6.25, + 3.8461538461538463, + 4.242424242424242, + 2.4, + 6.25, + 2.0567375886524824, + 3.6363636363636367, + 7.5, + 3.333333333333333, + 4.324324324324325, + 6.666666666666666, + 5.517241379310345, + 3.6363636363636367, + 3.125, + 10.434782608695652, + 11.304347826086955, + 15.555555555555555, + 2.5, + 1.694915254237288, + 7.878787878787879, + 4.848484848484849, + 7.727272727272727, + 8.666666666666668, + 9.787234042553191, + 7.5, + 2.2857142857142856, + 4.3478260869565215, + 7.872340425531915, + 5.454545454545454, + 2.926829268292683, + 10, + 3.225806451612903, + 4.242424242424242, + 10, + 6, + 4.666666666666667, + 7.022900763358778, + 11.578947368421053, + 4.528301886792453, + 7.619047619047619, + 4.285714285714286, + 7.733333333333333, + 4.583333333333333, + 6.206896551724138, + 4.95575221238938, + 3.5294117647058827, + 3.783783783783784, + 9.09090909090909, + 4.235294117647059, + 2.545454545454545, + 5.142857142857142, + 12.352941176470589, + 5.511811023622047, + 10, + 7.047619047619048, + 3.4343434343434343, + 13.75, + 5, + 5.454545454545454, + 3.492063492063492, + 5.882352941176471, + 5.894736842105263, + 4.576271186440678, + 4.827586206896552, + 9.26829268292683, + 3.9080459770114944, + 1.5873015873015872, + 2.5396825396825395, + 7.25, + 6.7272727272727275, + 5, + 11.200000000000001, + 2.113821138211382, + 7.5, + 7.272727272727273, + 15, + 6.779661016949152, + 6.842105263157895, + 12.142857142857142, + 4.5, + 6.47887323943662, + 12.857142857142858, + 7.6923076923076925, + 6.382978723404255, + 4.090909090909091, + 15.555555555555555, + 4.854368932038835, + 4.854368932038835, + 9.333333333333334, + 5.777777777777777, + 8.571428571428571, + 5.172413793103448, + 12.307692307692308, + 10.69767441860465, + 7.428571428571429, + 3.384615384615385, + 4, + 2.490272373540856, + 2.412451361867704, + 4.6000000000000005, + 4.6000000000000005, + 5.957446808510638, + 2.6178010471204187, + 2.513089005235602, + 6.857142857142858, + 5.172413793103448, + 8, + 9.26829268292683, + 2.1739130434782608, + 2.3448275862068964, + 6.470588235294118, + 4.933333333333334, + 7.878787878787879, + 7.027027027027027, + 4, + 3.66412213740458, + 3.969465648854962, + 7.058823529411765, + 3.7681159420289854, + 6.76923076923077, + 3.6923076923076925, + 6.76923076923077, + 6.296296296296297, + 5.3097345132743365, + 8.305084745762713, + 1.2, + 2.4, + 8.571428571428571, + 11.48936170212766, + 9.62962962962963, + 9.13793103448276, + 4.067796610169491, + 4.8979591836734695, + 10.425531914893618, + 6.857142857142858, + 10.909090909090908, + 9.43089430894309, + 0, + 0, + 0 + ] }, "mode": "markers", - "name": "Matched ions", + "name": "Matched Ions", "type": "scatter", "x": [ 0.6686826253068318, @@ -136,8 +870,8 @@ 0.705266210128664, 0.5930865437498251, 0.2024614643737578, - 0.2922945771793652, 0.3344167026061641, + 0.2922945771793652, 0.2929942661169666, 0.2196765219565474, 0.2882645668024602, @@ -277,6 +1011,7 @@ 0.2020410992916848, 0.0833367618973456, 0.3524206220081063, + 0.3451908063562069, 0.2962198971438111, 0.0131292856774449, 0.2728924526571127, @@ -298,7 +1033,6 @@ 0.0947802941946988, 0.0782110915891292, 0.3287650685440457, - 0.3451908063562069, 0.8153530848051393, 0.0836388745766977, 0.1985394137797036, @@ -398,7 +1132,10 @@ 0.828844862867961, 0.8323772911117084, 0.3562542456636353, - 0.3584753524266403 + 0.3584753524266403, + 0, + 0, + 0 ], "y": [ 13, @@ -468,8 +1205,8 @@ 19, 25, 27, - 25, 28, + 25, 26, 24, 28, @@ -609,6 +1346,7 @@ 8, 5, 12, + 13, 7, 2, 15, @@ -630,7 +1368,6 @@ 9, 14, 46, - 13, 22, 24, 16, @@ -730,7 +1467,10 @@ 49, 36, 66, - 58 + 58, + 0, + 0, + 0 ] } ], @@ -1556,20 +2296,20 @@ }, "xaxis": { "title": { - "text": "score" + "text": "Score" } }, "yaxis": { "title": { - "text": "matches" + "text": "Matches" } } } }, "text/html": [ - "