######## snakemake preamble start (automatically inserted, do not edit) ########
-import sys; sys.path.extend(['/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/site-packages', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example/..', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/bin', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/lib-dynload', '/home/jbloom/.local/lib/python3.11/site-packages', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/site-packages', '/home/jbloom/.cache/snakemake/snakemake/source-cache/runtime-cache/tmpcrtmgh2v/file/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks']); import pickle; snakemake = pickle.loads(b'\x80\x04\x95\x9a\x14\x00\x00\x00\x00\x00\x00\x8c\x10snakemake.script\x94\x8c\tSnakemake\x94\x93\x94)\x81\x94}\x94(\x8c\x05input\x94\x8c\x0csnakemake.io\x94\x8c\nInputFiles\x94\x93\x94)\x81\x94(\x8c\'results/plates/plate11/curvefits.pickle\x94\x8c&results/plates/plate2/curvefits.pickle\x94e}\x94(\x8c\x06_names\x94}\x94\x8c\x07pickles\x94K\x00K\x02\x86\x94s\x8c\x12_allowed_overrides\x94]\x94(\x8c\x05index\x94\x8c\x04sort\x94eh\x13\x8c\tfunctools\x94\x8c\x07partial\x94\x93\x94h\x06\x8c\x19Namedlist._used_attribute\x94\x93\x94\x85\x94R\x94(h\x19)}\x94\x8c\x05_name\x94h\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh\x0fh\x06\x8c\tNamedlist\x94\x93\x94)\x81\x94(h\nh\x0be}\x94(h\r}\x94h\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bubub\x8c\x06output\x94h\x06\x8c\x0bOutputFiles\x94\x93\x94)\x81\x94(\x8c,results/sera/M099d0/titers_per_replicate.csv\x94\x8c\x1eresults/sera/M099d0/titers.csv\x94\x8c\x1eresults/sera/M099d0/curves.pdf\x94\x8c$results/sera/M099d0/curvefits.pickle\x94\x8c results/sera/M099d0/qc_drops.yml\x94e}\x94(h\r}\x94(\x8c\x0eper_rep_titers\x94K\x00N\x86\x94\x8c\x06titers\x94K\x01N\x86\x94\x8c\ncurves_pdf\x94K\x02N\x86\x94\x8c\x06pickle\x94K\x03N\x86\x94\x8c\x08qc_drops\x94K\x04N\x86\x94uh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh<h5h>h6h@h7hBh8hDh9ub\x8c\x06params\x94h\x06\x8c\x06Params\x94\x93\x94)\x81\x94(]\x94(\x8c\x14A/California/07/2009\x94\x8c\x12A/Michigan/45/2015\x94\x8c\x12A/Brisbane/02/2018\x94\x8c\x11A/Ghana/2080/2020\x94\x8c\x18A/Cote_DIvoire/1448/2021\x94\x8c\x10A/Ghana/138/2020\x94\x8c\x0fA/Togo/845/2020\x94\x8c\x10A/Togo/0274/2021\x94\x8c\x10A/Hawaii/70/2019\x94\x8c\x12A/Niger/10217/2021\x94\x8c\x19A/SouthAfrica/R16462/2021\x94\x8c\x10A/Togo/0304/2021\x94\x8c\x12A/Paris/31196/2021\x94\x8c\x14A/Belgium/H0017/2022\x94\x8c\x18A/England/220200318/2022\x94\x8c\x12A/Paris/30353/2021\x94\x8c\x10A/Nimes/871/2021\x94\x8c\x14A/Belgium/H0038/2022\x94\x8c\x14A/Wisconsin/588/2019\x94\x8c\x14A/Washington/23/2020\x94\x8c\x1aA/India/Pun-NIV312851/2021\x94\x8c\x1aA/India-PUN-NIV328484/2021\x94\x8c\x16A/Bangladesh/8036/2021\x94\x8c\x0eA/Perth/1/2022\x94\x8c\x19A/SouthAfrica/R14850/2021\x94\x8c\x13A/Norway/25089/2022\x94\x8c\x1cA/Bangladesh/3210810034/2021\x94\x8c\x13A/Chester/5355/2022\x94\x8c\x12A/Michigan/19/2021\x94\x8c\x0eA/Utah/27/2022\x94\x8c\x12A/Newcastle/2/2022\x94\x8c\x1fA/India-Pune-Nivcov2221170/2022\x94\x8c\x10A/Sydney/43/2022\x94\x8c\x12A/Brisbane/48/2022\x94\x8c\x16A/Bangladesh/8002/2021\x94\x8c\x16A/Bangladesh/2221/2021\x94e\x8c\x08midpoint\x94}\x94(\x8c\x0emin_replicates\x94K\x02\x8c\x1bmax_fold_change_from_median\x94K\x03\x8c\x11viruses_ignore_qc\x94]\x94ue}\x94(h\r}\x94(\x8c\x17viral_strain_plot_order\x94K\x00N\x86\x94\x8c\x0eserum_titer_as\x94K\x01N\x86\x94\x8c\rqc_thresholds\x94K\x02N\x86\x94uh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh\x80hSh\x82hxh\x84hyub\x8c\twildcards\x94h\x06\x8c\tWildcards\x94\x93\x94)\x81\x94\x8c\x06M099d0\x94a}\x94(h\r}\x94\x8c\x05serum\x94K\x00N\x86\x94sh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94b\x8c\x05serum\x94h\x93ub\x8c\x07threads\x94K\x01\x8c\tresources\x94h\x06\x8c\tResources\x94\x93\x94)\x81\x94(K\x01K\x01\x8c\x04/tmp\x94e}\x94(h\r}\x94(\x8c\x06_cores\x94K\x00N\x86\x94\x8c\x06_nodes\x94K\x01N\x86\x94\x8c\x06tmpdir\x94K\x02N\x86\x94uh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh\xaaK\x01h\xacK\x01h\xaeh\xa7ub\x8c\x03log\x94h\x06\x8c\x03Log\x94\x93\x94)\x81\x94\x8c\'results/sera/M099d0/M099d0_titers.ipynb\x94a}\x94(h\r}\x94\x8c\x08notebook\x94K\x00N\x86\x94sh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh\xc0h\xbdub\x8c\x06config\x94}\x94(\x8c\x10seqneut-pipeline\x94\x8c\x03../\x94\x8c\x04docs\x94\x8c\x07../docs\x94\x8c\x0bdescription\x94X\xba\x01\x00\x00# Test example for [seqneut-pipeline](https://github.com/jbloomlab/seqneut-pipeline)\nThis is a small toy-example created by subsetting a real experiment dataset.\n\nSee [https://github.com/jbloomlab/seqneut-pipeline](https://github.com/jbloomlab/seqneut-pipeline)\nfor the computer code and underlying numerical data.\n\nSee [here](https://github.com/jbloomlab/seqneut-pipeline/graphs/contributors) for a\nlist of all contributors to the pipeline.\n\x94\x8c\x0fviral_libraries\x94}\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c-data/viral_libraries/pdmH1N1_lib2023_loes.csv\x94s\x8c\x17viral_strain_plot_order\x94\x8c data/viral_strain_plot_order.csv\x94\x8c\x12neut_standard_sets\x94}\x94\x8c\x08loes2023\x94\x8c3data/neut_standard_sets/loes2023_neut_standards.csv\x94s\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\x08upstream\x94\x8c\x1fCTCCCTACAATGTCGGATTTGTATTTAATAG\x94\x8c\ndownstream\x94\x8c\x00\x94\x8c\x04minq\x94K\x14\x8c\x11upstream_mismatch\x94K\x04\x8c\x0ebc_orientation\x94\x8c\x02R2\x94u\x8c#default_process_plate_qc_thresholds\x94}\x94(\x8c\x1bavg_barcode_counts_per_well\x94K\xfa\x8c\x1fmin_neut_standard_frac_per_well\x94G?tz\xe1G\xae\x14{\x8c"no_serum_per_viral_barcode_filters\x94}\x94(\x8c\x08min_frac\x94G?@bM\xd2\xf1\xa9\xfc\x8c\x0fmax_fold_change\x94K\x03\x8c\tmax_wells\x94K\x02u\x8c!per_neut_standard_barcode_filters\x94}\x94(\x8c\x08min_frac\x94G?tz\xe1G\xae\x14{\x8c\x0fmax_fold_change\x94K\x03\x8c\tmax_wells\x94K\x02u\x8c min_neut_standard_count_per_well\x94M\xf4\x01\x8c)min_no_serum_count_per_viral_barcode_well\x94K\x1e\x8c+max_frac_infectivity_per_viral_barcode_well\x94K\x05\x8c)min_dilutions_per_barcode_serum_replicate\x94K\x06u\x8c%default_process_plate_curvefit_params\x94}\x94(\x8c\x18frac_infectivity_ceiling\x94K\x01\x8c\x06fixtop\x94\x89\x8c\tfixbottom\x94K\x00u\x8c!default_process_plate_curvefit_qc\x94}\x94(\x8c\x1dmax_frac_infectivity_at_least\x94G?\xe0\x00\x00\x00\x00\x00\x00\x8c\x06min_R2\x94G?\xe9\x99\x99\x99\x99\x99\x9a\x8c#serum_replicates_ignore_curvefit_qc\x94]\x94\x8c+barcode_serum_replicates_ignore_curvefit_qc\x94]\x94u\x8c\x06plates\x94}\x94(\x8c\x06plate2\x94}\x94(\x8c\x04date\x94\x8c\x08datetime\x94\x8c\x04date\x94\x93\x94C\x04\x07\xe7\x08\x01\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate2_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xe9K\xfah\xeaG?tz\xe1G\xae\x14{h\xeb}\x94(h\xedG?@bM\xd2\xf1\xa9\xfch\xeeK\x03h\xefK\x02uh\xf0}\x94(h\xf2G?tz\xe1G\xae\x14{h\xf3K\x03h\xf4K\x02uh\xf5M\xf4\x01h\xf6K\x1eh\xf7K\x05h\xf8K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xfbK\x01h\xfc\x89h\xfdK\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\x00\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\x01\x01\x00\x00G?\xe9\x99\x99\x99\x99\x99\x9aj\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00j\x05\x01\x00\x00uu\x8c\x07plate11\x94}\x94(\x8c\x04date\x94j\r\x01\x00\x00C\x04\x07\xe7\t\x1a\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1fdata/plates/plate11_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\x18barcode_serum_replicates\x94]\x94]\x94(\x8c\x10AGTCCTATCCTCAAAT\x94\x8c\x06M099d0\x94eas\x8c\rqc_thresholds\x94}\x94(h\xe9K\xfah\xeaG?tz\xe1G\xae\x14{h\xeb}\x94(h\xedG?@bM\xd2\xf1\xa9\xfch\xeeK\x03h\xefK\x02uh\xf0}\x94(h\xf2G?tz\xe1G\xae\x14{h\xf3K\x03h\xf4K\x02uh\xf5M\xf4\x01h\xf6K\x1eh\xf7K\x05h\xf8K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xfbK\x01h\xfc\x89h\xfdK\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\x00\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\x01\x01\x00\x00G?\xe9\x99\x99\x99\x99\x99\x9aj\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00]\x94]\x94(\x8c\x10AGGTCAAGACCACAGG\x94\x8c\x06M099d0\x94eauuu\x8c\x16default_serum_titer_as\x94hx\x8c\x1bdefault_serum_qc_thresholds\x94hy\x8c\x16sera_override_defaults\x94}\x94\x8c\x07M099d30\x94}\x94\x8c\rqc_thresholds\x94}\x94(hzK\x02h{K\x03h|]\x94\x8c\x14A/Belgium/H0017/2022\x94aussu\x8c\x04rule\x94\x8c\x0cserum_titers\x94\x8c\x0fbench_iteration\x94N\x8c\tscriptdir\x94\x8ct/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks\x94ub.'); from snakemake.logging import logger; logger.printshellcmds = False; import os; os.chdir(r'/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example');
+import sys; sys.path.extend(['/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/site-packages', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example/..', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/bin', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/lib-dynload', '/home/jbloom/.local/lib/python3.11/site-packages', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/site-packages', '/home/jbloom/.cache/snakemake/snakemake/source-cache/runtime-cache/tmpfy0wpwzq/file/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks']); import pickle; snakemake = pickle.loads(b'\x80\x04\x95\xdb\x14\x00\x00\x00\x00\x00\x00\x8c\x10snakemake.script\x94\x8c\tSnakemake\x94\x93\x94)\x81\x94}\x94(\x8c\x05input\x94\x8c\x0csnakemake.io\x94\x8c\nInputFiles\x94\x93\x94)\x81\x94(\x8c\'results/plates/plate11/curvefits.pickle\x94\x8c&results/plates/plate2/curvefits.pickle\x94e}\x94(\x8c\x06_names\x94}\x94\x8c\x07pickles\x94K\x00K\x02\x86\x94s\x8c\x12_allowed_overrides\x94]\x94(\x8c\x05index\x94\x8c\x04sort\x94eh\x13\x8c\tfunctools\x94\x8c\x07partial\x94\x93\x94h\x06\x8c\x19Namedlist._used_attribute\x94\x93\x94\x85\x94R\x94(h\x19)}\x94\x8c\x05_name\x94h\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh\x0fh\x06\x8c\tNamedlist\x94\x93\x94)\x81\x94(h\nh\x0be}\x94(h\r}\x94h\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bubub\x8c\x06output\x94h\x06\x8c\x0bOutputFiles\x94\x93\x94)\x81\x94(\x8c,results/sera/M099d0/titers_per_replicate.csv\x94\x8c\x1eresults/sera/M099d0/titers.csv\x94\x8c\x1eresults/sera/M099d0/curves.pdf\x94\x8c$results/sera/M099d0/curvefits.pickle\x94\x8c results/sera/M099d0/qc_drops.yml\x94e}\x94(h\r}\x94(\x8c\x0eper_rep_titers\x94K\x00N\x86\x94\x8c\x06titers\x94K\x01N\x86\x94\x8c\ncurves_pdf\x94K\x02N\x86\x94\x8c\x06pickle\x94K\x03N\x86\x94\x8c\x08qc_drops\x94K\x04N\x86\x94uh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh<h5h>h6h@h7hBh8hDh9ub\x8c\x06params\x94h\x06\x8c\x06Params\x94\x93\x94)\x81\x94(]\x94(\x8c\x14A/California/07/2009\x94\x8c\x12A/Michigan/45/2015\x94\x8c\x12A/Brisbane/02/2018\x94\x8c\x11A/Ghana/2080/2020\x94\x8c\x18A/Cote_DIvoire/1448/2021\x94\x8c\x10A/Ghana/138/2020\x94\x8c\x0fA/Togo/845/2020\x94\x8c\x10A/Togo/0274/2021\x94\x8c\x10A/Hawaii/70/2019\x94\x8c\x12A/Niger/10217/2021\x94\x8c\x19A/SouthAfrica/R16462/2021\x94\x8c\x10A/Togo/0304/2021\x94\x8c\x12A/Paris/31196/2021\x94\x8c\x14A/Belgium/H0017/2022\x94\x8c\x18A/England/220200318/2022\x94\x8c\x12A/Paris/30353/2021\x94\x8c\x10A/Nimes/871/2021\x94\x8c\x14A/Belgium/H0038/2022\x94\x8c\x14A/Wisconsin/588/2019\x94\x8c\x14A/Washington/23/2020\x94\x8c\x1aA/India/Pun-NIV312851/2021\x94\x8c\x1aA/India-PUN-NIV328484/2021\x94\x8c\x16A/Bangladesh/8036/2021\x94\x8c\x0eA/Perth/1/2022\x94\x8c\x19A/SouthAfrica/R14850/2021\x94\x8c\x13A/Norway/25089/2022\x94\x8c\x1cA/Bangladesh/3210810034/2021\x94\x8c\x13A/Chester/5355/2022\x94\x8c\x12A/Michigan/19/2021\x94\x8c\x0eA/Utah/27/2022\x94\x8c\x12A/Newcastle/2/2022\x94\x8c\x1fA/India-Pune-Nivcov2221170/2022\x94\x8c\x10A/Sydney/43/2022\x94\x8c\x12A/Brisbane/48/2022\x94\x8c\x16A/Bangladesh/8002/2021\x94\x8c\x16A/Bangladesh/2221/2021\x94e\x8c\x08midpoint\x94}\x94(\x8c\x0emin_replicates\x94K\x02\x8c\x1bmax_fold_change_from_median\x94K\x03\x8c\x11viruses_ignore_qc\x94]\x94ue}\x94(h\r}\x94(\x8c\x17viral_strain_plot_order\x94K\x00N\x86\x94\x8c\x0eserum_titer_as\x94K\x01N\x86\x94\x8c\rqc_thresholds\x94K\x02N\x86\x94uh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh\x80hSh\x82hxh\x84hyub\x8c\twildcards\x94h\x06\x8c\tWildcards\x94\x93\x94)\x81\x94\x8c\x06M099d0\x94a}\x94(h\r}\x94\x8c\x05serum\x94K\x00N\x86\x94sh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94b\x8c\x05serum\x94h\x93ub\x8c\x07threads\x94K\x01\x8c\tresources\x94h\x06\x8c\tResources\x94\x93\x94)\x81\x94(K\x01K\x01\x8c\x04/tmp\x94e}\x94(h\r}\x94(\x8c\x06_cores\x94K\x00N\x86\x94\x8c\x06_nodes\x94K\x01N\x86\x94\x8c\x06tmpdir\x94K\x02N\x86\x94uh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh\xaaK\x01h\xacK\x01h\xaeh\xa7ub\x8c\x03log\x94h\x06\x8c\x03Log\x94\x93\x94)\x81\x94\x8c\'results/sera/M099d0/M099d0_titers.ipynb\x94a}\x94(h\r}\x94\x8c\x08notebook\x94K\x00N\x86\x94sh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh\xc0h\xbdub\x8c\x06config\x94}\x94(\x8c\x10seqneut-pipeline\x94\x8c\x03../\x94\x8c\x04docs\x94\x8c\x07../docs\x94\x8c\x0bdescription\x94X\xba\x01\x00\x00# Test example for [seqneut-pipeline](https://github.com/jbloomlab/seqneut-pipeline)\nThis is a small toy-example created by subsetting a real experiment dataset.\n\nSee [https://github.com/jbloomlab/seqneut-pipeline](https://github.com/jbloomlab/seqneut-pipeline)\nfor the computer code and underlying numerical data.\n\nSee [here](https://github.com/jbloomlab/seqneut-pipeline/graphs/contributors) for a\nlist of all contributors to the pipeline.\n\x94\x8c\x0fviral_libraries\x94}\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c-data/viral_libraries/pdmH1N1_lib2023_loes.csv\x94s\x8c\x17viral_strain_plot_order\x94\x8c data/viral_strain_plot_order.csv\x94\x8c\x12neut_standard_sets\x94}\x94\x8c\x08loes2023\x94\x8c3data/neut_standard_sets/loes2023_neut_standards.csv\x94s\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\x08upstream\x94\x8c\x1fCTCCCTACAATGTCGGATTTGTATTTAATAG\x94\x8c\ndownstream\x94\x8c\x00\x94\x8c\x04minq\x94K\x14\x8c\x11upstream_mismatch\x94K\x04\x8c\x0ebc_orientation\x94\x8c\x02R2\x94u\x8c#default_process_plate_qc_thresholds\x94}\x94(\x8c\x1bavg_barcode_counts_per_well\x94K\xfa\x8c\x1fmin_neut_standard_frac_per_well\x94G?tz\xe1G\xae\x14{\x8c"no_serum_per_viral_barcode_filters\x94}\x94(\x8c\x08min_frac\x94G?@bM\xd2\xf1\xa9\xfc\x8c\x0fmax_fold_change\x94K\x03\x8c\tmax_wells\x94K\x02u\x8c!per_neut_standard_barcode_filters\x94}\x94(\x8c\x08min_frac\x94G?tz\xe1G\xae\x14{\x8c\x0fmax_fold_change\x94K\x03\x8c\tmax_wells\x94K\x02u\x8c min_neut_standard_count_per_well\x94M\xf4\x01\x8c)min_no_serum_count_per_viral_barcode_well\x94K\x1e\x8c+max_frac_infectivity_per_viral_barcode_well\x94K\x05\x8c)min_dilutions_per_barcode_serum_replicate\x94K\x06u\x8c%default_process_plate_curvefit_params\x94}\x94(\x8c\x18frac_infectivity_ceiling\x94K\x01\x8c\x06fixtop\x94\x89\x8c\tfixbottom\x94K\x00u\x8c!default_process_plate_curvefit_qc\x94}\x94(\x8c\x1dmax_frac_infectivity_at_least\x94G?\xe0\x00\x00\x00\x00\x00\x00\x8c\x06min_R2\x94G?\xe9\x99\x99\x99\x99\x99\x9a\x8c#serum_replicates_ignore_curvefit_qc\x94]\x94\x8c+barcode_serum_replicates_ignore_curvefit_qc\x94]\x94u\x8c\x06plates\x94}\x94(\x8c\x06plate2\x94}\x94(\x8c\x04date\x94\x8c\x08datetime\x94\x8c\x04date\x94\x93\x94C\x04\x07\xe7\x08\x01\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate2_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xe9K\xfah\xeaG?tz\xe1G\xae\x14{h\xeb}\x94(h\xedG?@bM\xd2\xf1\xa9\xfch\xeeK\x03h\xefK\x02uh\xf0}\x94(h\xf2G?tz\xe1G\xae\x14{h\xf3K\x03h\xf4K\x02uh\xf5M\xf4\x01h\xf6K\x1eh\xf7K\x05h\xf8K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xfbK\x01h\xfc\x89h\xfdK\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\x00\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\x01\x01\x00\x00G?\xe9\x99\x99\x99\x99\x99\x9aj\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00j\x05\x01\x00\x00uu\x8c\x07plate11\x94}\x94(\x8c\x04date\x94j\r\x01\x00\x00C\x04\x07\xe7\t\x1a\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1fdata/plates/plate11_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\x18barcode_serum_replicates\x94]\x94]\x94(\x8c\x10AGTCCTATCCTCAAAT\x94\x8c\x06M099d0\x94eas\x8c\rqc_thresholds\x94}\x94(h\xe9K\xfah\xeaG?tz\xe1G\xae\x14{h\xeb}\x94(h\xedG?@bM\xd2\xf1\xa9\xfch\xeeK\x03h\xefK\x02uh\xf0}\x94(h\xf2G?tz\xe1G\xae\x14{h\xf3K\x03h\xf4K\x02uh\xf5M\xf4\x01h\xf6K\x1eh\xf7K\x05h\xf8K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xfbK\x01h\xfc\x89h\xfdK\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\x00\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\x01\x01\x00\x00G?\xe9\x99\x99\x99\x99\x99\x9aj\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00]\x94]\x94(\x8c\x10AGGTCAAGACCACAGG\x94\x8c\x06M099d0\x94eauuu\x8c\x16default_serum_titer_as\x94hx\x8c\x1bdefault_serum_qc_thresholds\x94hy\x8c\x16sera_override_defaults\x94}\x94(\x8c\x07M099d30\x94}\x94\x8c\rqc_thresholds\x94}\x94(hzK\x02h{K\x03h|]\x94\x8c\x14A/Belgium/H0017/2022\x94aus\x8c\x07Y044d30\x94}\x94(\x8c\rqc_thresholds\x94}\x94(hzK\x02h{K\x04h|h}u\x8c\x08titer_as\x94\x8c\x04nt50\x94uuu\x8c\x04rule\x94\x8c\x0cserum_titers\x94\x8c\x0fbench_iteration\x94N\x8c\tscriptdir\x94\x8ct/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks\x94ub.'); from snakemake.logging import logger; logger.printshellcmds = False; import os; os.chdir(r'/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example');
######## snakemake preamble end #########
Correlate NT50s with midpoints
Out[7]:
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+In [1]:
+
+
+######## snakemake preamble start (automatically inserted, do not edit) ########
+import sys; sys.path.extend(['/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/site-packages', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example/..', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/bin', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/lib-dynload', '/home/jbloom/.local/lib/python3.11/site-packages', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/site-packages', '/home/jbloom/.cache/snakemake/snakemake/source-cache/runtime-cache/tmpmyp1r1xp/file/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks']); import pickle; snakemake = pickle.loads(b'\x80\x04\x95\xaf\x11\x00\x00\x00\x00\x00\x00\x8c\x10snakemake.script\x94\x8c\tSnakemake\x94\x93\x94)\x81\x94}\x94(\x8c\x05input\x94\x8c\x0csnakemake.io\x94\x8c\nInputFiles\x94\x93\x94)\x81\x94(\x8c"results/plates/plate2/qc_drops.yml\x94\x8c#results/plates/plate11/qc_drops.yml\x94\x8c results/sera/M099d0/qc_drops.yml\x94\x8c!results/sera/M099d30/qc_drops.yml\x94\x8c!results/sera/Y044d30/qc_drops.yml\x94\x8c"results/sera/Y154d182/qc_drops.yml\x94e}\x94(\x8c\x06_names\x94}\x94(\x8c\x0eplate_qc_drops\x94K\x00K\x02\x86\x94\x8c\rsera_qc_drops\x94K\x02K\x06\x86\x94u\x8c\x12_allowed_overrides\x94]\x94(\x8c\x05index\x94\x8c\x04sort\x94eh\x19\x8c\tfunctools\x94\x8c\x07partial\x94\x93\x94h\x06\x8c\x19Namedlist._used_attribute\x94\x93\x94\x85\x94R\x94(h\x1f)}\x94\x8c\x05_name\x94h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bh\x13h\x06\x8c\tNamedlist\x94\x93\x94)\x81\x94(h\nh\x0be}\x94(h\x11}\x94h\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bubh\x15h*)\x81\x94(h\x0ch\rh\x0eh\x0fe}\x94(h\x11}\x94h\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bubub\x8c\x06output\x94h\x06\x8c\x0bOutputFiles\x94\x93\x94)\x81\x94(\x8c#results/qc_drops/plate_qc_drops.yml\x94\x8c"results/qc_drops/sera_qc_drops.yml\x94e}\x94(h\x11}\x94(h\x13K\x00N\x86\x94h\x15K\x01N\x86\x94uh\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bh\x13hGh\x15hHub\x8c\x06params\x94h\x06\x8c\x06Params\x94\x93\x94)\x81\x94(]\x94(\x8c\x06plate2\x94\x8c\x07plate11\x94e]\x94(\x8c\x06M099d0\x94\x8c\x07M099d30\x94\x8c\x07Y044d30\x94\x8c\x08Y154d182\x94ee}\x94(h\x11}\x94(\x8c\x06plates\x94K\x00N\x86\x94\x8c\x04sera\x94K\x01N\x86\x94uh\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bhdhZhfh]ub\x8c\twildcards\x94h\x06\x8c\tWildcards\x94\x93\x94)\x81\x94}\x94(h\x11}\x94h\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bub\x8c\x07threads\x94K\x01\x8c\tresources\x94h\x06\x8c\tResources\x94\x93\x94)\x81\x94(K\x01K\x01\x8c\x04/tmp\x94e}\x94(h\x11}\x94(\x8c\x06_cores\x94K\x00N\x86\x94\x8c\x06_nodes\x94K\x01N\x86\x94\x8c\x06tmpdir\x94K\x02N\x86\x94uh\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bh\x88K\x01h\x8aK\x01h\x8ch\x85ub\x8c\x03log\x94h\x06\x8c\x03Log\x94\x93\x94)\x81\x94\x8c)results/qc_drops/aggregate_qc_drops.ipynb\x94a}\x94(h\x11}\x94\x8c\x08notebook\x94K\x00N\x86\x94sh\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bh\x9eh\x9bub\x8c\x06config\x94}\x94(\x8c\x10seqneut-pipeline\x94\x8c\x03../\x94\x8c\x04docs\x94\x8c\x07../docs\x94\x8c\x0bdescription\x94X\xba\x01\x00\x00# Test example for [seqneut-pipeline](https://github.com/jbloomlab/seqneut-pipeline)\nThis is a small toy-example created by subsetting a real experiment dataset.\n\nSee [https://github.com/jbloomlab/seqneut-pipeline](https://github.com/jbloomlab/seqneut-pipeline)\nfor the computer code and underlying numerical data.\n\nSee [here](https://github.com/jbloomlab/seqneut-pipeline/graphs/contributors) for a\nlist of all contributors to the pipeline.\n\x94\x8c\x0fviral_libraries\x94}\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c-data/viral_libraries/pdmH1N1_lib2023_loes.csv\x94s\x8c\x17viral_strain_plot_order\x94\x8c data/viral_strain_plot_order.csv\x94\x8c\x12neut_standard_sets\x94}\x94\x8c\x08loes2023\x94\x8c3data/neut_standard_sets/loes2023_neut_standards.csv\x94s\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\x08upstream\x94\x8c\x1fCTCCCTACAATGTCGGATTTGTATTTAATAG\x94\x8c\ndownstream\x94\x8c\x00\x94\x8c\x04minq\x94K\x14\x8c\x11upstream_mismatch\x94K\x04\x8c\x0ebc_orientation\x94\x8c\x02R2\x94u\x8c#default_process_plate_qc_thresholds\x94}\x94(\x8c\x1bavg_barcode_counts_per_well\x94K\xfa\x8c\x1fmin_neut_standard_frac_per_well\x94G?tz\xe1G\xae\x14{\x8c"no_serum_per_viral_barcode_filters\x94}\x94(\x8c\x08min_frac\x94G?@bM\xd2\xf1\xa9\xfc\x8c\x0fmax_fold_change\x94K\x03\x8c\tmax_wells\x94K\x02u\x8c!per_neut_standard_barcode_filters\x94}\x94(\x8c\x08min_frac\x94G?tz\xe1G\xae\x14{\x8c\x0fmax_fold_change\x94K\x03\x8c\tmax_wells\x94K\x02u\x8c min_neut_standard_count_per_well\x94M\xf4\x01\x8c)min_no_serum_count_per_viral_barcode_well\x94K\x1e\x8c+max_frac_infectivity_per_viral_barcode_well\x94K\x05\x8c)min_dilutions_per_barcode_serum_replicate\x94K\x06u\x8c%default_process_plate_curvefit_params\x94}\x94(\x8c\x18frac_infectivity_ceiling\x94K\x01\x8c\x06fixtop\x94\x89\x8c\tfixbottom\x94K\x00u\x8c!default_process_plate_curvefit_qc\x94}\x94(\x8c\x1dmax_frac_infectivity_at_least\x94G?\xe0\x00\x00\x00\x00\x00\x00\x8c\x06min_R2\x94G?\xe9\x99\x99\x99\x99\x99\x9a\x8c#serum_replicates_ignore_curvefit_qc\x94]\x94\x8c+barcode_serum_replicates_ignore_curvefit_qc\x94]\x94u\x8c\x06plates\x94}\x94(h[}\x94(\x8c\x04date\x94\x8c\x08datetime\x94\x8c\x04date\x94\x93\x94C\x04\x07\xe7\x08\x01\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate2_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xc7K\xfah\xc8G?tz\xe1G\xae\x14{h\xc9}\x94(h\xcbG?@bM\xd2\xf1\xa9\xfch\xccK\x03h\xcdK\x02uh\xce}\x94(h\xd0G?tz\xe1G\xae\x14{h\xd1K\x03h\xd2K\x02uh\xd3M\xf4\x01h\xd4K\x1eh\xd5K\x05h\xd6K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xd9K\x01h\xda\x89h\xdbK\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xdeG?\xe0\x00\x00\x00\x00\x00\x00h\xdfG?\xe9\x99\x99\x99\x99\x99\x9ah\xe0h\xe1h\xe2h\xe3uuh\\}\x94(\x8c\x04date\x94h\xeaC\x04\x07\xe7\t\x1a\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1fdata/plates/plate11_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\x18barcode_serum_replicates\x94]\x94]\x94(\x8c\x10AGTCCTATCCTCAAAT\x94\x8c\x06M099d0\x94eas\x8c\rqc_thresholds\x94}\x94(h\xc7K\xfah\xc8G?tz\xe1G\xae\x14{h\xc9}\x94(h\xcbG?@bM\xd2\xf1\xa9\xfch\xccK\x03h\xcdK\x02uh\xce}\x94(h\xd0G?tz\xe1G\xae\x14{h\xd1K\x03h\xd2K\x02uh\xd3M\xf4\x01h\xd4K\x1eh\xd5K\x05h\xd6K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xd9K\x01h\xda\x89h\xdbK\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xdeG?\xe0\x00\x00\x00\x00\x00\x00h\xdfG?\xe9\x99\x99\x99\x99\x99\x9ah\xe0h\xe1h\xe2]\x94]\x94(\x8c\x10AGGTCAAGACCACAGG\x94\x8c\x06M099d0\x94eauuu\x8c\x16default_serum_titer_as\x94\x8c\x08midpoint\x94\x8c\x1bdefault_serum_qc_thresholds\x94}\x94(\x8c\x0emin_replicates\x94K\x02\x8c\x1bmax_fold_change_from_median\x94K\x03\x8c\x11viruses_ignore_qc\x94]\x94u\x8c\x16sera_override_defaults\x94}\x94(\x8c\x07M099d30\x94}\x94\x8c\rqc_thresholds\x94}\x94(j \x01\x00\x00K\x02j!\x01\x00\x00K\x03j"\x01\x00\x00]\x94\x8c\x14A/Belgium/H0017/2022\x94aus\x8c\x07Y044d30\x94}\x94(\x8c\rqc_thresholds\x94}\x94(j \x01\x00\x00K\x02j!\x01\x00\x00K\x04j"\x01\x00\x00j#\x01\x00\x00u\x8c\x08titer_as\x94\x8c\x04nt50\x94uuu\x8c\x04rule\x94\x8c\x12aggregate_qc_drops\x94\x8c\x0fbench_iteration\x94N\x8c\tscriptdir\x94\x8ct/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks\x94ub.'); from snakemake.logging import logger; logger.printshellcmds = False; import os; os.chdir(r'/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example');
+######## snakemake preamble end #########
+
+
+
+
+
+
+
+
+
+
+
+
+Aggregate and analyze the drops from QC-ing the plates and sera¶
+
+
+
+
+
+
+
+
+In [2]:
+
+
+import altair as alt
+
+import pandas as pd
+
+from ruamel.yaml import YAML
+
+yaml = YAML(typ="rt")
+
+_ = alt.data_transformers.disable_max_rows()
+
+
+
+
+
+
+
+
+
+
+
+
+Get variables from snakemake
:
+
+
+
+
+
+
+
+
+In [3]:
+
+
+input_plate_qc_drops = snakemake.input.plate_qc_drops
+input_sera_qc_drops = snakemake.input.sera_qc_drops
+output_plate_qc_drops = snakemake.output.plate_qc_drops
+output_sera_qc_drops = snakemake.output.sera_qc_drops
+plates = snakemake.params.plates
+sera = snakemake.params.sera
+
+
+
+
+
+
+
+
+
+
+
+
+Analyze plate QC drops¶
Read QC drops for individual plates into a merged dictionary, write it to YAML, and also convert to a DataFrame.
+If you really want to look into the details of what is being dropped, you will want to look at that merged YAML file.
+
+
+
+
+
+
+
+
+In [4]:
+
+
+# read dictionary of QC drops
+assert len(plates) == len(input_plate_qc_drops)
+plate_qc_drops = {}
+for plate, qc_drops_yaml in zip(plates, input_plate_qc_drops):
+ with open(qc_drops_yaml) as f:
+ plate_qc_drops[plate] = yaml.load(f)
+assert len(plate_qc_drops) == len(input_plate_qc_drops)
+
+print(f"Writing merged plate drops to {output_plate_qc_drops}")
+with open(output_plate_qc_drops, "w") as f:
+ yaml.dump(plate_qc_drops, stream=f)
+
+# convert dictionary of QC drops into list of tuples
+plate_qc_drop_tups = [
+ (plate_key, droptype_key, drop_key, reason)
+ for (plate_key, plate_val) in plate_qc_drops.items()
+ for droptype_key, droptype_val in plate_val.items()
+ for drop_key, reason in droptype_val.items()
+]
+
+# create data frame of QC drops
+plate_qc_drops_df = pd.DataFrame(
+ plate_qc_drop_tups,
+ columns=["plate", "drop type", "drop", "reason"],
+)
+
+
+
+
+
+
+
+
+
+
+
+
+Writing merged plate drops to results/qc_drops/plate_qc_drops.yml
+
+
+
+
+
+
+
+
+
+
+In [5]:
+
+
+plate_qc_drop_counts = plate_qc_drops_df.groupby(
+ ["plate", "drop type", "reason"], as_index=False
+).aggregate(n_drops=pd.NamedAgg("drop", "nunique"))
+assert plate_qc_drop_counts["n_drops"].sum() == len(plate_qc_drops_df)
+
+
+
+
+
+
+
+
+
+
+
+
+Now plot the number of drops for each plate.
+You should be worried (maybe re-do or discard) any plates with a very large number of drops:
+
+
+
+
+
+
+
+
+In [6]:
+
+
+plate_selection = alt.selection_point(fields=["plate"], on="mouseover", empty=False)
+
+plate_qc_drop_counts_chart = (
+ alt.Chart(plate_qc_drop_counts)
+ .add_params(plate_selection)
+ .encode(
+ alt.X(
+ "n_drops",
+ title="number of drops",
+ ),
+ alt.Y(
+ "plate",
+ sort=plates,
+ title=None,
+ axis=alt.Axis(labelFontStyle="bold", labelFontSize=11),
+ ),
+ alt.Column(
+ "drop type",
+ title=None,
+ spacing=5,
+ header=alt.Header(labelFontSize=12, labelFontStyle="bold", labelPadding=1),
+ ),
+ alt.Color(
+ "reason",
+ legend=alt.Legend(
+ orient="top", columns=1, labelLimit=230, title=None, padding=1
+ ),
+ ),
+ strokeWidth=alt.condition(plate_selection, alt.value(3), alt.value(0.5)),
+ tooltip=plate_qc_drop_counts.columns.tolist(),
+ )
+ .mark_bar(height={"band": 0.8}, stroke="black")
+ .properties(
+ width=230,
+ height=alt.Step(16),
+ title=alt.TitleParams(
+ "Number of QC drops when processing plates", anchor="middle", dy=-2
+ ),
+ )
+ .configure_axis(grid=False)
+ .resolve_scale(color="independent", x="independent")
+)
+
+plate_qc_drop_counts_chart
+
+
+
+
+
+
+
+
+
+
+Out[6]:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Look for barcodes dropped especially often in plate QC¶
If a barcode is dropped especially often across plates, that could indicate something problematic with that barcode such that it should be removed altogether from the library analysis.
+
+
+
+
+
+
+
+
+In [7]:
+
+
+barcode_drops = (
+ plate_qc_drops_df.query("`drop type`.str.startswith('barcode')")
+ .assign(barcode=lambda x: x["drop"].str.split().str[0])
+ .groupby(["drop type", "barcode"], as_index=False)
+ .aggregate(
+ plates_where_dropped=pd.NamedAgg("plate", "nunique"),
+ total_drops=pd.NamedAgg("plate", "count"),
+ )
+)
+
+barcode_selection = alt.selection_point(fields=["barcode"], on="mouseover", empty=False)
+
+barcode_drops_chart = (
+ alt.Chart(barcode_drops)
+ .add_params(barcode_selection)
+ .encode(
+ alt.X(
+ "total_drops",
+ title="times barcode dropped",
+ ),
+ alt.Y(
+ "barcode",
+ sort=alt.SortField("total_drops", order="descending"),
+ axis=alt.Axis(labelFontSize=9),
+ ),
+ alt.Column(
+ "drop type",
+ title=None,
+ spacing=8,
+ header=alt.Header(labelFontSize=12, labelFontStyle="bold", labelPadding=1),
+ ),
+ strokeWidth=alt.condition(barcode_selection, alt.value(3), alt.value(0.5)),
+ tooltip=barcode_drops.columns.tolist(),
+ )
+ .mark_bar(height={"band": 0.8}, stroke="black")
+ .properties(
+ width=200,
+ height=alt.Step(10),
+ title=alt.TitleParams(
+ "Number of QC drops when processing plates", anchor="middle", dy=-2
+ ),
+ )
+ .configure_axis(grid=False)
+ .resolve_scale(color="independent", x="independent", y="independent")
+)
+
+barcode_drops_chart
+
+
+
+
+
+
+
+
+
+
+Out[7]:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Analyze the sera QC¶
Analyze the QC performed on the sera, which involves completely dropping titers for certain virus-sera pairs.
+Read the QC for different sera into a merged dictionary, write it to YAML, and also convert to a DataFrame.
+If you really want to look into the details of what is being dropped, you will want to look at that merged YAML file.
+
+
+
+
+
+
+
+
+In [8]:
+
+
+# read dictionary of QC drops
+assert len(sera) == len(input_sera_qc_drops)
+sera_qc_drops = {}
+for serum, qc_drops_yaml in zip(sera, input_sera_qc_drops):
+ with open(qc_drops_yaml) as f:
+ sera_qc_drops[serum] = yaml.load(f)
+assert len(sera_qc_drops) == len(input_sera_qc_drops)
+
+print(f"Writing merged sera drops to {output_sera_qc_drops}")
+with open(output_sera_qc_drops, "w") as f:
+ yaml.dump(sera_qc_drops, stream=f)
+
+# convert dictionary of QC drops into list of tuples
+sera_qc_drop_tups = [
+ (serum_key, virus, reason)
+ for (serum_key, serum_val) in sera_qc_drops.items()
+ for virus, reason in serum_val.items()
+]
+
+# create data frame of QC drops
+sera_qc_drops_df = pd.DataFrame(sera_qc_drop_tups, columns=["serum", "virus", "reason"])
+
+
+
+
+
+
+
+
+
+
+
+
+Writing merged sera drops to results/qc_drops/sera_qc_drops.yml
+
+
+
+
+
+
+
+
+
+
+
+
+Plot the number of viruses dropped for each serum.
+If a serum has many missed viruses, then you will lack a lot of titers and so it may be worth reviewing the cause of the drops.
+
+
+
+
+
+
+
+
+In [9]:
+
+
+sera_n_drops = sera_qc_drops_df.groupby(["serum", "reason"], as_index=False).aggregate(
+ n_viruses=pd.NamedAgg("virus", "nunique")
+)
+assert sera_n_drops["n_viruses"].sum() == len(sera_qc_drops_df)
+
+sera_n_drops_chart = (
+ alt.Chart(sera_n_drops)
+ .encode(
+ alt.X("n_viruses", title="number of viruses dropped"),
+ alt.Y("serum", sort=sera),
+ alt.Color("reason", title="reason dropped", legend=alt.Legend(labelLimit=350)),
+ tooltip=sera_n_drops.columns.tolist(),
+ )
+ .mark_bar(height={"band": 0.8})
+ .properties(
+ width=250,
+ height=alt.Step(13),
+ title="Number of viruses dropped at serum QC for each serum",
+ )
+ .configure_axis(grid=False)
+)
+
+sera_n_drops_chart
+
+
+
+
+
+
+
+
+
+
+Out[9]:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Plot the number of sera for which each virus is dropped during serum QC.
+If a virus is dropped for many sera, that may indicate some issue with that virus in assays:
+
+
+
+
+
+
+
+
+In [10]:
+
+
+virus_n_drops = sera_qc_drops_df.groupby(["virus", "reason"], as_index=False).aggregate(
+ n_sera=pd.NamedAgg("serum", "nunique")
+)
+assert virus_n_drops["n_sera"].sum() == len(sera_qc_drops_df)
+
+virus_n_drops_chart = (
+ alt.Chart(virus_n_drops)
+ .encode(
+ alt.X("n_sera", title="number of sera for which virus is dropped"),
+ alt.Y("virus", sort=alt.SortField("n_sera", order="descending")),
+ alt.Color("reason", title="reason dropped", legend=alt.Legend(labelLimit=350)),
+ tooltip=virus_n_drops.columns.tolist(),
+ )
+ .mark_bar(height={"band": 0.8})
+ .properties(
+ width=250,
+ height=alt.Step(13),
+ title="Number of sera for which each virus is dropped at serum QC",
+ )
+ .configure_axis(grid=False)
+)
+
+virus_n_drops_chart
+
+
+
+
+
+
+
+
+
+
+Out[10]:
+
+
+
+
+
+
+
+
+
+
+
+
+
+In [ ]:
+
+
+
+
+
+
+
+
+
+
+
+
######## snakemake preamble start (automatically inserted, do not edit) ########
+import sys; sys.path.extend(['/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/site-packages', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example/..', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/bin', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/lib-dynload', '/home/jbloom/.local/lib/python3.11/site-packages', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/site-packages', '/home/jbloom/.cache/snakemake/snakemake/source-cache/runtime-cache/tmpmyp1r1xp/file/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks']); import pickle; snakemake = pickle.loads(b'\x80\x04\x95\xaf\x11\x00\x00\x00\x00\x00\x00\x8c\x10snakemake.script\x94\x8c\tSnakemake\x94\x93\x94)\x81\x94}\x94(\x8c\x05input\x94\x8c\x0csnakemake.io\x94\x8c\nInputFiles\x94\x93\x94)\x81\x94(\x8c"results/plates/plate2/qc_drops.yml\x94\x8c#results/plates/plate11/qc_drops.yml\x94\x8c results/sera/M099d0/qc_drops.yml\x94\x8c!results/sera/M099d30/qc_drops.yml\x94\x8c!results/sera/Y044d30/qc_drops.yml\x94\x8c"results/sera/Y154d182/qc_drops.yml\x94e}\x94(\x8c\x06_names\x94}\x94(\x8c\x0eplate_qc_drops\x94K\x00K\x02\x86\x94\x8c\rsera_qc_drops\x94K\x02K\x06\x86\x94u\x8c\x12_allowed_overrides\x94]\x94(\x8c\x05index\x94\x8c\x04sort\x94eh\x19\x8c\tfunctools\x94\x8c\x07partial\x94\x93\x94h\x06\x8c\x19Namedlist._used_attribute\x94\x93\x94\x85\x94R\x94(h\x1f)}\x94\x8c\x05_name\x94h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bh\x13h\x06\x8c\tNamedlist\x94\x93\x94)\x81\x94(h\nh\x0be}\x94(h\x11}\x94h\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bubh\x15h*)\x81\x94(h\x0ch\rh\x0eh\x0fe}\x94(h\x11}\x94h\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bubub\x8c\x06output\x94h\x06\x8c\x0bOutputFiles\x94\x93\x94)\x81\x94(\x8c#results/qc_drops/plate_qc_drops.yml\x94\x8c"results/qc_drops/sera_qc_drops.yml\x94e}\x94(h\x11}\x94(h\x13K\x00N\x86\x94h\x15K\x01N\x86\x94uh\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bh\x13hGh\x15hHub\x8c\x06params\x94h\x06\x8c\x06Params\x94\x93\x94)\x81\x94(]\x94(\x8c\x06plate2\x94\x8c\x07plate11\x94e]\x94(\x8c\x06M099d0\x94\x8c\x07M099d30\x94\x8c\x07Y044d30\x94\x8c\x08Y154d182\x94ee}\x94(h\x11}\x94(\x8c\x06plates\x94K\x00N\x86\x94\x8c\x04sera\x94K\x01N\x86\x94uh\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bhdhZhfh]ub\x8c\twildcards\x94h\x06\x8c\tWildcards\x94\x93\x94)\x81\x94}\x94(h\x11}\x94h\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bub\x8c\x07threads\x94K\x01\x8c\tresources\x94h\x06\x8c\tResources\x94\x93\x94)\x81\x94(K\x01K\x01\x8c\x04/tmp\x94e}\x94(h\x11}\x94(\x8c\x06_cores\x94K\x00N\x86\x94\x8c\x06_nodes\x94K\x01N\x86\x94\x8c\x06tmpdir\x94K\x02N\x86\x94uh\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bh\x88K\x01h\x8aK\x01h\x8ch\x85ub\x8c\x03log\x94h\x06\x8c\x03Log\x94\x93\x94)\x81\x94\x8c)results/qc_drops/aggregate_qc_drops.ipynb\x94a}\x94(h\x11}\x94\x8c\x08notebook\x94K\x00N\x86\x94sh\x17]\x94(h\x19h\x1aeh\x19h\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x19sNt\x94bh\x1ah\x1dh\x1f\x85\x94R\x94(h\x1f)}\x94h#h\x1asNt\x94bh\x9eh\x9bub\x8c\x06config\x94}\x94(\x8c\x10seqneut-pipeline\x94\x8c\x03../\x94\x8c\x04docs\x94\x8c\x07../docs\x94\x8c\x0bdescription\x94X\xba\x01\x00\x00# Test example for [seqneut-pipeline](https://github.com/jbloomlab/seqneut-pipeline)\nThis is a small toy-example created by subsetting a real experiment dataset.\n\nSee [https://github.com/jbloomlab/seqneut-pipeline](https://github.com/jbloomlab/seqneut-pipeline)\nfor the computer code and underlying numerical data.\n\nSee [here](https://github.com/jbloomlab/seqneut-pipeline/graphs/contributors) for a\nlist of all contributors to the pipeline.\n\x94\x8c\x0fviral_libraries\x94}\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c-data/viral_libraries/pdmH1N1_lib2023_loes.csv\x94s\x8c\x17viral_strain_plot_order\x94\x8c data/viral_strain_plot_order.csv\x94\x8c\x12neut_standard_sets\x94}\x94\x8c\x08loes2023\x94\x8c3data/neut_standard_sets/loes2023_neut_standards.csv\x94s\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\x08upstream\x94\x8c\x1fCTCCCTACAATGTCGGATTTGTATTTAATAG\x94\x8c\ndownstream\x94\x8c\x00\x94\x8c\x04minq\x94K\x14\x8c\x11upstream_mismatch\x94K\x04\x8c\x0ebc_orientation\x94\x8c\x02R2\x94u\x8c#default_process_plate_qc_thresholds\x94}\x94(\x8c\x1bavg_barcode_counts_per_well\x94K\xfa\x8c\x1fmin_neut_standard_frac_per_well\x94G?tz\xe1G\xae\x14{\x8c"no_serum_per_viral_barcode_filters\x94}\x94(\x8c\x08min_frac\x94G?@bM\xd2\xf1\xa9\xfc\x8c\x0fmax_fold_change\x94K\x03\x8c\tmax_wells\x94K\x02u\x8c!per_neut_standard_barcode_filters\x94}\x94(\x8c\x08min_frac\x94G?tz\xe1G\xae\x14{\x8c\x0fmax_fold_change\x94K\x03\x8c\tmax_wells\x94K\x02u\x8c min_neut_standard_count_per_well\x94M\xf4\x01\x8c)min_no_serum_count_per_viral_barcode_well\x94K\x1e\x8c+max_frac_infectivity_per_viral_barcode_well\x94K\x05\x8c)min_dilutions_per_barcode_serum_replicate\x94K\x06u\x8c%default_process_plate_curvefit_params\x94}\x94(\x8c\x18frac_infectivity_ceiling\x94K\x01\x8c\x06fixtop\x94\x89\x8c\tfixbottom\x94K\x00u\x8c!default_process_plate_curvefit_qc\x94}\x94(\x8c\x1dmax_frac_infectivity_at_least\x94G?\xe0\x00\x00\x00\x00\x00\x00\x8c\x06min_R2\x94G?\xe9\x99\x99\x99\x99\x99\x9a\x8c#serum_replicates_ignore_curvefit_qc\x94]\x94\x8c+barcode_serum_replicates_ignore_curvefit_qc\x94]\x94u\x8c\x06plates\x94}\x94(h[}\x94(\x8c\x04date\x94\x8c\x08datetime\x94\x8c\x04date\x94\x93\x94C\x04\x07\xe7\x08\x01\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate2_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xc7K\xfah\xc8G?tz\xe1G\xae\x14{h\xc9}\x94(h\xcbG?@bM\xd2\xf1\xa9\xfch\xccK\x03h\xcdK\x02uh\xce}\x94(h\xd0G?tz\xe1G\xae\x14{h\xd1K\x03h\xd2K\x02uh\xd3M\xf4\x01h\xd4K\x1eh\xd5K\x05h\xd6K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xd9K\x01h\xda\x89h\xdbK\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xdeG?\xe0\x00\x00\x00\x00\x00\x00h\xdfG?\xe9\x99\x99\x99\x99\x99\x9ah\xe0h\xe1h\xe2h\xe3uuh\\}\x94(\x8c\x04date\x94h\xeaC\x04\x07\xe7\t\x1a\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1fdata/plates/plate11_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\x18barcode_serum_replicates\x94]\x94]\x94(\x8c\x10AGTCCTATCCTCAAAT\x94\x8c\x06M099d0\x94eas\x8c\rqc_thresholds\x94}\x94(h\xc7K\xfah\xc8G?tz\xe1G\xae\x14{h\xc9}\x94(h\xcbG?@bM\xd2\xf1\xa9\xfch\xccK\x03h\xcdK\x02uh\xce}\x94(h\xd0G?tz\xe1G\xae\x14{h\xd1K\x03h\xd2K\x02uh\xd3M\xf4\x01h\xd4K\x1eh\xd5K\x05h\xd6K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xd9K\x01h\xda\x89h\xdbK\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xdeG?\xe0\x00\x00\x00\x00\x00\x00h\xdfG?\xe9\x99\x99\x99\x99\x99\x9ah\xe0h\xe1h\xe2]\x94]\x94(\x8c\x10AGGTCAAGACCACAGG\x94\x8c\x06M099d0\x94eauuu\x8c\x16default_serum_titer_as\x94\x8c\x08midpoint\x94\x8c\x1bdefault_serum_qc_thresholds\x94}\x94(\x8c\x0emin_replicates\x94K\x02\x8c\x1bmax_fold_change_from_median\x94K\x03\x8c\x11viruses_ignore_qc\x94]\x94u\x8c\x16sera_override_defaults\x94}\x94(\x8c\x07M099d30\x94}\x94\x8c\rqc_thresholds\x94}\x94(j \x01\x00\x00K\x02j!\x01\x00\x00K\x03j"\x01\x00\x00]\x94\x8c\x14A/Belgium/H0017/2022\x94aus\x8c\x07Y044d30\x94}\x94(\x8c\rqc_thresholds\x94}\x94(j \x01\x00\x00K\x02j!\x01\x00\x00K\x04j"\x01\x00\x00j#\x01\x00\x00u\x8c\x08titer_as\x94\x8c\x04nt50\x94uuu\x8c\x04rule\x94\x8c\x12aggregate_qc_drops\x94\x8c\x0fbench_iteration\x94N\x8c\tscriptdir\x94\x8ct/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks\x94ub.'); from snakemake.logging import logger; logger.printshellcmds = False; import os; os.chdir(r'/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example');
+######## snakemake preamble end #########
+
Aggregate and analyze the drops from QC-ing the plates and sera¶
+import altair as alt
+
+import pandas as pd
+
+from ruamel.yaml import YAML
+
+yaml = YAML(typ="rt")
+
+_ = alt.data_transformers.disable_max_rows()
+
Get variables from snakemake
:
input_plate_qc_drops = snakemake.input.plate_qc_drops
+input_sera_qc_drops = snakemake.input.sera_qc_drops
+output_plate_qc_drops = snakemake.output.plate_qc_drops
+output_sera_qc_drops = snakemake.output.sera_qc_drops
+plates = snakemake.params.plates
+sera = snakemake.params.sera
+
Analyze plate QC drops¶
Read QC drops for individual plates into a merged dictionary, write it to YAML, and also convert to a DataFrame. +If you really want to look into the details of what is being dropped, you will want to look at that merged YAML file.
+# read dictionary of QC drops
+assert len(plates) == len(input_plate_qc_drops)
+plate_qc_drops = {}
+for plate, qc_drops_yaml in zip(plates, input_plate_qc_drops):
+ with open(qc_drops_yaml) as f:
+ plate_qc_drops[plate] = yaml.load(f)
+assert len(plate_qc_drops) == len(input_plate_qc_drops)
+
+print(f"Writing merged plate drops to {output_plate_qc_drops}")
+with open(output_plate_qc_drops, "w") as f:
+ yaml.dump(plate_qc_drops, stream=f)
+
+# convert dictionary of QC drops into list of tuples
+plate_qc_drop_tups = [
+ (plate_key, droptype_key, drop_key, reason)
+ for (plate_key, plate_val) in plate_qc_drops.items()
+ for droptype_key, droptype_val in plate_val.items()
+ for drop_key, reason in droptype_val.items()
+]
+
+# create data frame of QC drops
+plate_qc_drops_df = pd.DataFrame(
+ plate_qc_drop_tups,
+ columns=["plate", "drop type", "drop", "reason"],
+)
+
Writing merged plate drops to results/qc_drops/plate_qc_drops.yml ++
plate_qc_drop_counts = plate_qc_drops_df.groupby(
+ ["plate", "drop type", "reason"], as_index=False
+).aggregate(n_drops=pd.NamedAgg("drop", "nunique"))
+assert plate_qc_drop_counts["n_drops"].sum() == len(plate_qc_drops_df)
+
Now plot the number of drops for each plate. +You should be worried (maybe re-do or discard) any plates with a very large number of drops:
+plate_selection = alt.selection_point(fields=["plate"], on="mouseover", empty=False)
+
+plate_qc_drop_counts_chart = (
+ alt.Chart(plate_qc_drop_counts)
+ .add_params(plate_selection)
+ .encode(
+ alt.X(
+ "n_drops",
+ title="number of drops",
+ ),
+ alt.Y(
+ "plate",
+ sort=plates,
+ title=None,
+ axis=alt.Axis(labelFontStyle="bold", labelFontSize=11),
+ ),
+ alt.Column(
+ "drop type",
+ title=None,
+ spacing=5,
+ header=alt.Header(labelFontSize=12, labelFontStyle="bold", labelPadding=1),
+ ),
+ alt.Color(
+ "reason",
+ legend=alt.Legend(
+ orient="top", columns=1, labelLimit=230, title=None, padding=1
+ ),
+ ),
+ strokeWidth=alt.condition(plate_selection, alt.value(3), alt.value(0.5)),
+ tooltip=plate_qc_drop_counts.columns.tolist(),
+ )
+ .mark_bar(height={"band": 0.8}, stroke="black")
+ .properties(
+ width=230,
+ height=alt.Step(16),
+ title=alt.TitleParams(
+ "Number of QC drops when processing plates", anchor="middle", dy=-2
+ ),
+ )
+ .configure_axis(grid=False)
+ .resolve_scale(color="independent", x="independent")
+)
+
+plate_qc_drop_counts_chart
+
Look for barcodes dropped especially often in plate QC¶
If a barcode is dropped especially often across plates, that could indicate something problematic with that barcode such that it should be removed altogether from the library analysis.
+barcode_drops = (
+ plate_qc_drops_df.query("`drop type`.str.startswith('barcode')")
+ .assign(barcode=lambda x: x["drop"].str.split().str[0])
+ .groupby(["drop type", "barcode"], as_index=False)
+ .aggregate(
+ plates_where_dropped=pd.NamedAgg("plate", "nunique"),
+ total_drops=pd.NamedAgg("plate", "count"),
+ )
+)
+
+barcode_selection = alt.selection_point(fields=["barcode"], on="mouseover", empty=False)
+
+barcode_drops_chart = (
+ alt.Chart(barcode_drops)
+ .add_params(barcode_selection)
+ .encode(
+ alt.X(
+ "total_drops",
+ title="times barcode dropped",
+ ),
+ alt.Y(
+ "barcode",
+ sort=alt.SortField("total_drops", order="descending"),
+ axis=alt.Axis(labelFontSize=9),
+ ),
+ alt.Column(
+ "drop type",
+ title=None,
+ spacing=8,
+ header=alt.Header(labelFontSize=12, labelFontStyle="bold", labelPadding=1),
+ ),
+ strokeWidth=alt.condition(barcode_selection, alt.value(3), alt.value(0.5)),
+ tooltip=barcode_drops.columns.tolist(),
+ )
+ .mark_bar(height={"band": 0.8}, stroke="black")
+ .properties(
+ width=200,
+ height=alt.Step(10),
+ title=alt.TitleParams(
+ "Number of QC drops when processing plates", anchor="middle", dy=-2
+ ),
+ )
+ .configure_axis(grid=False)
+ .resolve_scale(color="independent", x="independent", y="independent")
+)
+
+barcode_drops_chart
+
Analyze the sera QC¶
Analyze the QC performed on the sera, which involves completely dropping titers for certain virus-sera pairs.
+Read the QC for different sera into a merged dictionary, write it to YAML, and also convert to a DataFrame. +If you really want to look into the details of what is being dropped, you will want to look at that merged YAML file.
+# read dictionary of QC drops
+assert len(sera) == len(input_sera_qc_drops)
+sera_qc_drops = {}
+for serum, qc_drops_yaml in zip(sera, input_sera_qc_drops):
+ with open(qc_drops_yaml) as f:
+ sera_qc_drops[serum] = yaml.load(f)
+assert len(sera_qc_drops) == len(input_sera_qc_drops)
+
+print(f"Writing merged sera drops to {output_sera_qc_drops}")
+with open(output_sera_qc_drops, "w") as f:
+ yaml.dump(sera_qc_drops, stream=f)
+
+# convert dictionary of QC drops into list of tuples
+sera_qc_drop_tups = [
+ (serum_key, virus, reason)
+ for (serum_key, serum_val) in sera_qc_drops.items()
+ for virus, reason in serum_val.items()
+]
+
+# create data frame of QC drops
+sera_qc_drops_df = pd.DataFrame(sera_qc_drop_tups, columns=["serum", "virus", "reason"])
+
Writing merged sera drops to results/qc_drops/sera_qc_drops.yml ++
Plot the number of viruses dropped for each serum. +If a serum has many missed viruses, then you will lack a lot of titers and so it may be worth reviewing the cause of the drops.
+sera_n_drops = sera_qc_drops_df.groupby(["serum", "reason"], as_index=False).aggregate(
+ n_viruses=pd.NamedAgg("virus", "nunique")
+)
+assert sera_n_drops["n_viruses"].sum() == len(sera_qc_drops_df)
+
+sera_n_drops_chart = (
+ alt.Chart(sera_n_drops)
+ .encode(
+ alt.X("n_viruses", title="number of viruses dropped"),
+ alt.Y("serum", sort=sera),
+ alt.Color("reason", title="reason dropped", legend=alt.Legend(labelLimit=350)),
+ tooltip=sera_n_drops.columns.tolist(),
+ )
+ .mark_bar(height={"band": 0.8})
+ .properties(
+ width=250,
+ height=alt.Step(13),
+ title="Number of viruses dropped at serum QC for each serum",
+ )
+ .configure_axis(grid=False)
+)
+
+sera_n_drops_chart
+
Plot the number of sera for which each virus is dropped during serum QC. +If a virus is dropped for many sera, that may indicate some issue with that virus in assays:
+virus_n_drops = sera_qc_drops_df.groupby(["virus", "reason"], as_index=False).aggregate(
+ n_sera=pd.NamedAgg("serum", "nunique")
+)
+assert virus_n_drops["n_sera"].sum() == len(sera_qc_drops_df)
+
+virus_n_drops_chart = (
+ alt.Chart(virus_n_drops)
+ .encode(
+ alt.X("n_sera", title="number of sera for which virus is dropped"),
+ alt.Y("virus", sort=alt.SortField("n_sera", order="descending")),
+ alt.Color("reason", title="reason dropped", legend=alt.Legend(labelLimit=350)),
+ tooltip=virus_n_drops.columns.tolist(),
+ )
+ .mark_bar(height={"band": 0.8})
+ .properties(
+ width=250,
+ height=alt.Step(13),
+ title="Number of sera for which each virus is dropped at serum QC",
+ )
+ .configure_axis(grid=False)
+)
+
+virus_n_drops_chart
+
+