######## snakemake preamble start (automatically inserted, do not edit) ########
-import sys; sys.path.extend(['/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/site-packages', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example/..', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/bin', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/lib-dynload', '/home/jbloom/.local/lib/python3.11/site-packages', '/fh/fast/bloom_j/software/miniconda3/envs/seqneut-pipeline/lib/python3.11/site-packages', '/home/jbloom/.cache/snakemake/snakemake/source-cache/runtime-cache/tmp8en2sdjp/file/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks', '/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks']); import pickle; snakemake = pickle.loads(b'\x80\x04\x95a4\x00\x00\x00\x00\x00\x00\x8c\x10snakemake.script\x94\x8c\tSnakemake\x94\x93\x94)\x81\x94}\x94(\x8c\x05input\x94\x8c\x0csnakemake.io\x94\x8c\nInputFiles\x94\x93\x94)\x81\x94(\x8c*results/barcode_counts/plate11_none-10.csv\x94\x8c-results/barcode_counts/plate11_M099d30_20.csv\x94\x8c-results/barcode_counts/plate11_M099d30_60.csv\x94\x8c.results/barcode_counts/plate11_M099d30_180.csv\x94\x8c.results/barcode_counts/plate11_M099d30_540.csv\x94\x8c/results/barcode_counts/plate11_M099d30_1620.csv\x94\x8c/results/barcode_counts/plate11_M099d30_4860.csv\x94\x8c0results/barcode_counts/plate11_M099d30_14580.csv\x94\x8c0results/barcode_counts/plate11_M099d30_43740.csv\x94\x8c1results/barcode_counts/plate11_M099d30_131220.csv\x94\x8c1results/barcode_counts/plate11_M099d30_393660.csv\x94\x8c)results/barcode_counts/plate11_none-2.csv\x94\x8c*results/barcode_counts/plate11_none-11.csv\x94\x8c,results/barcode_counts/plate11_M099d0_20.csv\x94\x8c,results/barcode_counts/plate11_M099d0_60.csv\x94\x8c-results/barcode_counts/plate11_M099d0_180.csv\x94\x8c-results/barcode_counts/plate11_M099d0_540.csv\x94\x8c.results/barcode_counts/plate11_M099d0_1620.csv\x94\x8c.results/barcode_counts/plate11_M099d0_4860.csv\x94\x8c/results/barcode_counts/plate11_M099d0_14580.csv\x94\x8c/results/barcode_counts/plate11_M099d0_43740.csv\x94\x8c0results/barcode_counts/plate11_M099d0_131220.csv\x94\x8c0results/barcode_counts/plate11_M099d0_393660.csv\x94\x8c)results/barcode_counts/plate11_none-3.csv\x94\x8c*results/barcode_counts/plate11_none-12.csv\x94\x8c-results/barcode_counts/plate11_Y044d30_20.csv\x94\x8c-results/barcode_counts/plate11_Y044d30_60.csv\x94\x8c.results/barcode_counts/plate11_Y044d30_180.csv\x94\x8c.results/barcode_counts/plate11_Y044d30_540.csv\x94\x8c/results/barcode_counts/plate11_Y044d30_1620.csv\x94\x8c/results/barcode_counts/plate11_Y044d30_4860.csv\x94\x8c0results/barcode_counts/plate11_Y044d30_14580.csv\x94\x8c0results/barcode_counts/plate11_Y044d30_43740.csv\x94\x8c1results/barcode_counts/plate11_Y044d30_131220.csv\x94\x8c1results/barcode_counts/plate11_Y044d30_393660.csv\x94\x8c)results/barcode_counts/plate11_none-4.csv\x94\x8c)results/barcode_fates/plate11_none-10.csv\x94\x8c,results/barcode_fates/plate11_M099d30_20.csv\x94\x8c,results/barcode_fates/plate11_M099d30_60.csv\x94\x8c-results/barcode_fates/plate11_M099d30_180.csv\x94\x8c-results/barcode_fates/plate11_M099d30_540.csv\x94\x8c.results/barcode_fates/plate11_M099d30_1620.csv\x94\x8c.results/barcode_fates/plate11_M099d30_4860.csv\x94\x8c/results/barcode_fates/plate11_M099d30_14580.csv\x94\x8c/results/barcode_fates/plate11_M099d30_43740.csv\x94\x8c0results/barcode_fates/plate11_M099d30_131220.csv\x94\x8c0results/barcode_fates/plate11_M099d30_393660.csv\x94\x8c(results/barcode_fates/plate11_none-2.csv\x94\x8c)results/barcode_fates/plate11_none-11.csv\x94\x8c+results/barcode_fates/plate11_M099d0_20.csv\x94\x8c+results/barcode_fates/plate11_M099d0_60.csv\x94\x8c,results/barcode_fates/plate11_M099d0_180.csv\x94\x8c,results/barcode_fates/plate11_M099d0_540.csv\x94\x8c-results/barcode_fates/plate11_M099d0_1620.csv\x94\x8c-results/barcode_fates/plate11_M099d0_4860.csv\x94\x8c.results/barcode_fates/plate11_M099d0_14580.csv\x94\x8c.results/barcode_fates/plate11_M099d0_43740.csv\x94\x8c/results/barcode_fates/plate11_M099d0_131220.csv\x94\x8c/results/barcode_fates/plate11_M099d0_393660.csv\x94\x8c(results/barcode_fates/plate11_none-3.csv\x94\x8c)results/barcode_fates/plate11_none-12.csv\x94\x8c,results/barcode_fates/plate11_Y044d30_20.csv\x94\x8c,results/barcode_fates/plate11_Y044d30_60.csv\x94\x8c-results/barcode_fates/plate11_Y044d30_180.csv\x94\x8c-results/barcode_fates/plate11_Y044d30_540.csv\x94\x8c.results/barcode_fates/plate11_Y044d30_1620.csv\x94\x8c.results/barcode_fates/plate11_Y044d30_4860.csv\x94\x8c/results/barcode_fates/plate11_Y044d30_14580.csv\x94\x8c/results/barcode_fates/plate11_Y044d30_43740.csv\x94\x8c0results/barcode_fates/plate11_Y044d30_131220.csv\x94\x8c0results/barcode_fates/plate11_Y044d30_393660.csv\x94\x8c(results/barcode_fates/plate11_none-4.csv\x94\x8c-data/viral_libraries/pdmH1N1_lib2023_loes.csv\x94\x8c3data/neut_standard_sets/loes2023_neut_standards.csv\x94e}\x94(\x8c\x06_names\x94}\x94(\x8c\ncount_csvs\x94K\x00K$\x86\x94\x8c\tfate_csvs\x94K$KH\x86\x94\x8c\x11viral_library_csv\x94KHN\x86\x94\x8c\x15neut_standard_set_csv\x94KIN\x86\x94u\x8c\x12_allowed_overrides\x94]\x94(\x8c\x05index\x94\x8c\x04sort\x94eha\x8c\tfunctools\x94\x8c\x07partial\x94\x93\x94h\x06\x8c\x19Namedlist._used_attribute\x94\x93\x94\x85\x94R\x94(hg)}\x94\x8c\x05_name\x94hasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bhWh\x06\x8c\tNamedlist\x94\x93\x94)\x81\x94(h\nh\x0bh\x0ch\rh\x0eh\x0fh\x10h\x11h\x12h\x13h\x14h\x15h\x16h\x17h\x18h\x19h\x1ah\x1bh\x1ch\x1dh\x1eh\x1fh h!h"h#h$h%h&h\'h(h)h*h+h,h-e}\x94(hU}\x94h_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bubhYhr)\x81\x94(h.h/h0h1h2h3h4h5h6h7h8h9h:h;h<h=h>h?h@hAhBhChDhEhFhGhHhIhJhKhLhMhNhOhPhQe}\x94(hU}\x94h_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bubh[hRh]hSub\x8c\x06output\x94h\x06\x8c\x0bOutputFiles\x94\x93\x94)\x81\x94(\x8c5results/plates/plate11/process_counts_qc_failures.txt\x94\x8c+results/plates/plate11/frac_infectivity.csv\x94e}\x94(hU}\x94(\x8c\x0bqc_failures\x94K\x00N\x86\x94\x8c\x14frac_infectivity_csv\x94K\x01N\x86\x94uh_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bh\x93h\x8fh\x95h\x90ub\x8c\x06params\x94h\x06\x8c\x06Params\x94\x93\x94)\x81\x94(]\x94(\x8c\x0fplate11_none-10\x94\x8c\x12plate11_M099d30_20\x94\x8c\x12plate11_M099d30_60\x94\x8c\x13plate11_M099d30_180\x94\x8c\x13plate11_M099d30_540\x94\x8c\x14plate11_M099d30_1620\x94\x8c\x14plate11_M099d30_4860\x94\x8c\x15plate11_M099d30_14580\x94\x8c\x15plate11_M099d30_43740\x94\x8c\x16plate11_M099d30_131220\x94\x8c\x16plate11_M099d30_393660\x94\x8c\x0eplate11_none-2\x94\x8c\x0fplate11_none-11\x94\x8c\x11plate11_M099d0_20\x94\x8c\x11plate11_M099d0_60\x94\x8c\x12plate11_M099d0_180\x94\x8c\x12plate11_M099d0_540\x94\x8c\x13plate11_M099d0_1620\x94\x8c\x13plate11_M099d0_4860\x94\x8c\x14plate11_M099d0_14580\x94\x8c\x14plate11_M099d0_43740\x94\x8c\x15plate11_M099d0_131220\x94\x8c\x15plate11_M099d0_393660\x94\x8c\x0eplate11_none-3\x94\x8c\x0fplate11_none-12\x94\x8c\x12plate11_Y044d30_20\x94\x8c\x12plate11_Y044d30_60\x94\x8c\x13plate11_Y044d30_180\x94\x8c\x13plate11_Y044d30_540\x94\x8c\x14plate11_Y044d30_1620\x94\x8c\x14plate11_Y044d30_4860\x94\x8c\x15plate11_Y044d30_14580\x94\x8c\x15plate11_Y044d30_43740\x94\x8c\x16plate11_Y044d30_131220\x94\x8c\x16plate11_Y044d30_393660\x94\x8c\x0eplate11_none-4\x94e}\x94(\x8c\x04date\x94\x8c\n2023-09-26\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1fdata/plates/plate11_samples.csv\x94\x8c\x1cprocess_counts_qc_thresholds\x94}\x94(\x8c\x12avg_barcode_counts\x94K\xfa\x8c\x16min_neut_standard_frac\x94G?tz\xe1G\xae\x14{\x8c\x1fmax_neut_standard_frac_no_serum\x94G?\xb9\x99\x99\x99\x99\x99\x9a\x8c\x18barcode_frac_consistency\x94K\x04\x8c\x16min_viral_barcode_frac\x94G?@bM\xd2\xf1\xa9\xfc\x8c\x1emin_neut_standard_barcode_frac\x94G?tz\xe1G\xae\x14{\x8c\x17min_neut_standard_count\x94M^\x01\x8c min_no_serum_viral_barcode_count\x94K\x05\x8c!min_dilutions_per_serum_replicate\x94K\x04\x8c\x14max_frac_infectivity\x94K\x08u\x8c\x10barcodes_to_drop\x94]\x94\x8c\x10GGTCCATCTCAGATCG\x94a\x8c\rwells_to_drop\x94]\x94(\x8c\x02D6\x94\x8c\x03C12\x94e\x8c\x07samples\x94}\x94(\x8c\x04well\x94}\x94(K\x00\x8c\x02B1\x94K\x01\x8c\x02B2\x94K\x02\x8c\x02B3\x94K\x03\x8c\x02B4\x94K\x04\x8c\x02B5\x94K\x05\x8c\x02B6\x94K\x06\x8c\x02B7\x94K\x07\x8c\x02B8\x94K\x08\x8c\x02B9\x94K\t\x8c\x03B10\x94K\n\x8c\x03B11\x94K\x0b\x8c\x03B12\x94K\x0c\x8c\x02C1\x94K\r\x8c\x02C2\x94K\x0e\x8c\x02C3\x94K\x0f\x8c\x02C4\x94K\x10\x8c\x02C5\x94K\x11\x8c\x02C6\x94K\x12\x8c\x02C7\x94K\x13\x8c\x02C8\x94K\x14\x8c\x02C9\x94K\x15\x8c\x03C10\x94K\x16\x8c\x03C11\x94K\x17\x8c\x03C12\x94K\x18\x8c\x02D1\x94K\x19\x8c\x02D2\x94K\x1a\x8c\x02D3\x94K\x1b\x8c\x02D4\x94K\x1c\x8c\x02D5\x94K\x1d\x8c\x02D6\x94K\x1e\x8c\x02D7\x94K\x1f\x8c\x02D8\x94K \x8c\x02D9\x94K!\x8c\x03D10\x94K"\x8c\x03D11\x94K#\x8c\x03D12\x94u\x8c\x05serum\x94}\x94(K\x00\x8c\x04none\x94K\x01\x8c\x07M099d30\x94K\x02j\x10\x01\x00\x00K\x03j\x10\x01\x00\x00K\x04j\x10\x01\x00\x00K\x05j\x10\x01\x00\x00K\x06j\x10\x01\x00\x00K\x07j\x10\x01\x00\x00K\x08j\x10\x01\x00\x00K\tj\x10\x01\x00\x00K\nj\x10\x01\x00\x00K\x0bj\x0f\x01\x00\x00K\x0cj\x0f\x01\x00\x00K\r\x8c\x06M099d0\x94K\x0ej\x11\x01\x00\x00K\x0fj\x11\x01\x00\x00K\x10j\x11\x01\x00\x00K\x11j\x11\x01\x00\x00K\x12j\x11\x01\x00\x00K\x13j\x11\x01\x00\x00K\x14j\x11\x01\x00\x00K\x15j\x11\x01\x00\x00K\x16j\x11\x01\x00\x00K\x17j\x0f\x01\x00\x00K\x18j\x0f\x01\x00\x00K\x19\x8c\x07Y044d30\x94K\x1aj\x12\x01\x00\x00K\x1bj\x12\x01\x00\x00K\x1cj\x12\x01\x00\x00K\x1dj\x12\x01\x00\x00K\x1ej\x12\x01\x00\x00K\x1fj\x12\x01\x00\x00K j\x12\x01\x00\x00K!j\x12\x01\x00\x00K"j\x12\x01\x00\x00K#j\x0f\x01\x00\x00u\x8c\x0fdilution_factor\x94}\x94(K\x00NK\x01K\x14K\x02K<K\x03K\xb4K\x04M\x1c\x02K\x05MT\x06K\x06M\xfc\x12K\x07M\xf48K\x08M\xdc\xaaK\tJ\x94\x00\x02\x00K\nJ\xbc\x01\x06\x00K\x0bNK\x0cNK\rK\x14K\x0eK<K\x0fK\xb4K\x10M\x1c\x02K\x11MT\x06K\x12M\xfc\x12K\x13M\xf48K\x14M\xdc\xaaK\x15J\x94\x00\x02\x00K\x16J\xbc\x01\x06\x00K\x17NK\x18NK\x19K\x14K\x1aK<K\x1bK\xb4K\x1cM\x1c\x02K\x1dMT\x06K\x1eM\xfc\x12K\x1fM\xf48K M\xdc\xaaK!J\x94\x00\x02\x00K"J\xbc\x01\x06\x00K#Nu\x8c\treplicate\x94}\x94(K\x00K\nK\x01K\x02K\x02K\x02K\x03K\x02K\x04K\x02K\x05K\x02K\x06K\x02K\x07K\x02K\x08K\x02K\tK\x02K\nK\x02K\x0bK\x02K\x0cK\x0bK\rK\x02K\x0eK\x02K\x0fK\x02K\x10K\x02K\x11K\x02K\x12K\x02K\x13K\x02K\x14K\x02K\x15K\x02K\x16K\x02K\x17K\x03K\x18K\x0cK\x19K\x02K\x1aK\x02K\x1bK\x02K\x1cK\x02K\x1dK\x02K\x1eK\x02K\x1fK\x02K K\x02K!K\x02K"K\x02K#K\x04u\x8c\x05fastq\x94}\x94(K\x00\x8c*fastqs/Plate11_Noserum2_S2_R1_001.fastq.gz\x94K\x01\x8c)fastqs/M099_d30_conc1_S10_R1_001.fastq.gz\x94K\x02\x8c)fastqs/M099_d30_conc2_S18_R1_001.fastq.gz\x94K\x03\x8c)fastqs/M099_d30_conc3_S26_R1_001.fastq.gz\x94K\x04\x8c)fastqs/M099_d30_conc4_S34_R1_001.fastq.gz\x94K\x05\x8c)fastqs/M099_d30_conc5_S42_R1_001.fastq.gz\x94K\x06\x8c)fastqs/M099_d30_conc6_S50_R1_001.fastq.gz\x94K\x07\x8c)fastqs/M099_d30_conc7_S58_R1_001.fastq.gz\x94K\x08\x8c)fastqs/M099_d30_conc8_S66_R1_001.fastq.gz\x94K\t\x8c)fastqs/M099_d30_conc9_S74_R1_001.fastq.gz\x94K\n\x8c*fastqs/M099_d30_conc10_S82_R1_001.fastq.gz\x94K\x0b\x8c,fastqs/Plate11_Noserum10_S90_R1_001.fastq.gz\x94K\x0c\x8c*fastqs/Plate11_Noserum3_S3_R1_001.fastq.gz\x94K\r\x8c(fastqs/M099_d0_conc1_S11_R1_001.fastq.gz\x94K\x0e\x8c(fastqs/M099_d0_conc2_S19_R1_001.fastq.gz\x94K\x0f\x8c(fastqs/M099_d0_conc3_S27_R1_001.fastq.gz\x94K\x10\x8c(fastqs/M099_d0_conc4_S35_R1_001.fastq.gz\x94K\x11\x8c(fastqs/M099_d0_conc5_S43_R1_001.fastq.gz\x94K\x12\x8c(fastqs/M099_d0_conc6_S51_R1_001.fastq.gz\x94K\x13\x8c(fastqs/M099_d0_conc7_S59_R1_001.fastq.gz\x94K\x14\x8c(fastqs/M099_d0_conc8_S67_R1_001.fastq.gz\x94K\x15\x8c(fastqs/M099_d0_conc9_S75_R1_001.fastq.gz\x94K\x16\x8c)fastqs/M099_d0_conc10_S83_R1_001.fastq.gz\x94K\x17\x8c,fastqs/Plate11_Noserum11_S91_R1_001.fastq.gz\x94K\x18\x8c*fastqs/Plate11_Noserum4_S4_R1_001.fastq.gz\x94K\x19\x8c)fastqs/Y044_d30_conc1_S12_R1_001.fastq.gz\x94K\x1a\x8c)fastqs/Y044_d30_conc2_S20_R1_001.fastq.gz\x94K\x1b\x8c)fastqs/Y044_d30_conc3_S28_R1_001.fastq.gz\x94K\x1c\x8c)fastqs/Y044_d30_conc4_S36_R1_001.fastq.gz\x94K\x1d\x8c)fastqs/Y044_d30_conc5_S44_R1_001.fastq.gz\x94K\x1e\x8c)fastqs/Y044_d30_conc6_S52_R1_001.fastq.gz\x94K\x1f\x8c)fastqs/Y044_d30_conc7_S60_R1_001.fastq.gz\x94K \x8c)fastqs/Y044_d30_conc8_S68_R1_001.fastq.gz\x94K!\x8c)fastqs/Y044_d30_conc9_S76_R1_001.fastq.gz\x94K"\x8c*fastqs/Y044_d30_conc10_S84_R1_001.fastq.gz\x94K#\x8c,fastqs/Plate11_Noserum12_S92_R1_001.fastq.gz\x94u\x8c\x0fserum_replicate\x94}\x94(K\x00\x8c\x07none-10\x94K\x01j\x10\x01\x00\x00K\x02j\x10\x01\x00\x00K\x03j\x10\x01\x00\x00K\x04j\x10\x01\x00\x00K\x05j\x10\x01\x00\x00K\x06j\x10\x01\x00\x00K\x07j\x10\x01\x00\x00K\x08j\x10\x01\x00\x00K\tj\x10\x01\x00\x00K\nj\x10\x01\x00\x00K\x0b\x8c\x06none-2\x94K\x0c\x8c\x07none-11\x94K\rj\x11\x01\x00\x00K\x0ej\x11\x01\x00\x00K\x0fj\x11\x01\x00\x00K\x10j\x11\x01\x00\x00K\x11j\x11\x01\x00\x00K\x12j\x11\x01\x00\x00K\x13j\x11\x01\x00\x00K\x14j\x11\x01\x00\x00K\x15j\x11\x01\x00\x00K\x16j\x11\x01\x00\x00K\x17\x8c\x06none-3\x94K\x18\x8c\x07none-12\x94K\x19j\x12\x01\x00\x00K\x1aj\x12\x01\x00\x00K\x1bj\x12\x01\x00\x00K\x1cj\x12\x01\x00\x00K\x1dj\x12\x01\x00\x00K\x1ej\x12\x01\x00\x00K\x1fj\x12\x01\x00\x00K j\x12\x01\x00\x00K!j\x12\x01\x00\x00K"j\x12\x01\x00\x00K#\x8c\x06none-4\x94u\x8c\x0esample_noplate\x94}\x94(K\x00j?\x01\x00\x00K\x01\x8c\nM099d30_20\x94K\x02\x8c\nM099d30_60\x94K\x03\x8c\x0bM099d30_180\x94K\x04\x8c\x0bM099d30_540\x94K\x05\x8c\x0cM099d30_1620\x94K\x06\x8c\x0cM099d30_4860\x94K\x07\x8c\rM099d30_14580\x94K\x08\x8c\rM099d30_43740\x94K\t\x8c\x0eM099d30_131220\x94K\n\x8c\x0eM099d30_393660\x94K\x0bj@\x01\x00\x00K\x0cjA\x01\x00\x00K\r\x8c\tM099d0_20\x94K\x0e\x8c\tM099d0_60\x94K\x0f\x8c\nM099d0_180\x94K\x10\x8c\nM099d0_540\x94K\x11\x8c\x0bM099d0_1620\x94K\x12\x8c\x0bM099d0_4860\x94K\x13\x8c\x0cM099d0_14580\x94K\x14\x8c\x0cM099d0_43740\x94K\x15\x8c\rM099d0_131220\x94K\x16\x8c\rM099d0_393660\x94K\x17jB\x01\x00\x00K\x18jC\x01\x00\x00K\x19\x8c\nY044d30_20\x94K\x1a\x8c\nY044d30_60\x94K\x1b\x8c\x0bY044d30_180\x94K\x1c\x8c\x0bY044d30_540\x94K\x1d\x8c\x0cY044d30_1620\x94K\x1e\x8c\x0cY044d30_4860\x94K\x1f\x8c\rY044d30_14580\x94K \x8c\rY044d30_43740\x94K!\x8c\x0eY044d30_131220\x94K"\x8c\x0eY044d30_393660\x94K#jD\x01\x00\x00u\x8c\x06sample\x94}\x94(K\x00h\xa5K\x01h\xa6K\x02h\xa7K\x03h\xa8K\x04h\xa9K\x05h\xaaK\x06h\xabK\x07h\xacK\x08h\xadK\th\xaeK\nh\xafK\x0bh\xb0K\x0ch\xb1K\rh\xb2K\x0eh\xb3K\x0fh\xb4K\x10h\xb5K\x11h\xb6K\x12h\xb7K\x13h\xb8K\x14h\xb9K\x15h\xbaK\x16h\xbbK\x17h\xbcK\x18h\xbdK\x19h\xbeK\x1ah\xbfK\x1bh\xc0K\x1ch\xc1K\x1dh\xc2K\x1eh\xc3K\x1fh\xc4K h\xc5K!h\xc6K"h\xc7K#h\xc8u\x8c\x05plate\x94}\x94(K\x00\x8c\x07plate11\x94K\x01ji\x01\x00\x00K\x02ji\x01\x00\x00K\x03ji\x01\x00\x00K\x04ji\x01\x00\x00K\x05ji\x01\x00\x00K\x06ji\x01\x00\x00K\x07ji\x01\x00\x00K\x08ji\x01\x00\x00K\tji\x01\x00\x00K\nji\x01\x00\x00K\x0bji\x01\x00\x00K\x0cji\x01\x00\x00K\rji\x01\x00\x00K\x0eji\x01\x00\x00K\x0fji\x01\x00\x00K\x10ji\x01\x00\x00K\x11ji\x01\x00\x00K\x12ji\x01\x00\x00K\x13ji\x01\x00\x00K\x14ji\x01\x00\x00K\x15ji\x01\x00\x00K\x16ji\x01\x00\x00K\x17ji\x01\x00\x00K\x18ji\x01\x00\x00K\x19ji\x01\x00\x00K\x1aji\x01\x00\x00K\x1bji\x01\x00\x00K\x1cji\x01\x00\x00K\x1dji\x01\x00\x00K\x1eji\x01\x00\x00K\x1fji\x01\x00\x00K ji\x01\x00\x00K!ji\x01\x00\x00K"ji\x01\x00\x00K#ji\x01\x00\x00u\x8c\x0fplate_replicate\x94}\x94(K\x00\x8c\nplate11-10\x94K\x01ji\x01\x00\x00K\x02ji\x01\x00\x00K\x03ji\x01\x00\x00K\x04ji\x01\x00\x00K\x05ji\x01\x00\x00K\x06ji\x01\x00\x00K\x07ji\x01\x00\x00K\x08ji\x01\x00\x00K\tji\x01\x00\x00K\nji\x01\x00\x00K\x0b\x8c\tplate11-2\x94K\x0c\x8c\nplate11-11\x94K\rji\x01\x00\x00K\x0eji\x01\x00\x00K\x0fji\x01\x00\x00K\x10ji\x01\x00\x00K\x11ji\x01\x00\x00K\x12ji\x01\x00\x00K\x13ji\x01\x00\x00K\x14ji\x01\x00\x00K\x15ji\x01\x00\x00K\x16ji\x01\x00\x00K\x17\x8c\tplate11-3\x94K\x18\x8c\nplate11-12\x94K\x19ji\x01\x00\x00K\x1aji\x01\x00\x00K\x1bji\x01\x00\x00K\x1cji\x01\x00\x00K\x1dji\x01\x00\x00K\x1eji\x01\x00\x00K\x1fji\x01\x00\x00K ji\x01\x00\x00K!ji\x01\x00\x00K"ji\x01\x00\x00K#\x8c\tplate11-4\x94uuue}\x94(hU}\x94(h\xe5K\x00N\x86\x94\x8c\x0cplate_params\x94K\x01N\x86\x94uh_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bh\xe5h\xa4ju\x01\x00\x00h\xc9ub\x8c\twildcards\x94h\x06\x8c\tWildcards\x94\x93\x94)\x81\x94\x8c\x07plate11\x94a}\x94(hU}\x94\x8c\x05plate\x94K\x00N\x86\x94sh_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bjg\x01\x00\x00j\x84\x01\x00\x00ub\x8c\x07threads\x94K\x01\x8c\tresources\x94h\x06\x8c\tResources\x94\x93\x94)\x81\x94(K\x01K\x01\x8c\x04/tmp\x94e}\x94(hU}\x94(\x8c\x06_cores\x94K\x00N\x86\x94\x8c\x06_nodes\x94K\x01N\x86\x94\x8c\x06tmpdir\x94K\x02N\x86\x94uh_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bj\x9a\x01\x00\x00K\x01j\x9c\x01\x00\x00K\x01j\x9e\x01\x00\x00j\x97\x01\x00\x00ub\x8c\x03log\x94h\x06\x8c\x03Log\x94\x93\x94)\x81\x94\x8c3results/plates/plate11/process_counts_plate11.ipynb\x94a}\x94(hU}\x94\x8c\x08notebook\x94K\x00N\x86\x94sh_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bj\xb0\x01\x00\x00j\xad\x01\x00\x00ub\x8c\x06config\x94}\x94(\x8c\x10seqneut-pipeline\x94\x8c\x03../\x94\x8c\x04docs\x94\x8c\x07../docs\x94\x8c\x0bdescription\x94X\xba\x01\x00\x00# Test example for [seqneut-pipeline](https://github.com/jbloomlab/seqneut-pipeline)\nThis is a small toy-example created by subsetting a real experiment dataset.\n\nSee [https://github.com/jbloomlab/seqneut-pipeline](https://github.com/jbloomlab/seqneut-pipeline)\nfor the computer code and underlying numerical data.\n\nSee [here](https://github.com/jbloomlab/seqneut-pipeline/graphs/contributors) for a\nlist of all contributors to the pipeline.\n\x94\x8c\x0fviral_libraries\x94}\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c-data/viral_libraries/pdmH1N1_lib2023_loes.csv\x94s\x8c\x17viral_strain_plot_order\x94\x8c data/viral_strain_plot_order.csv\x94\x8c\x12neut_standard_sets\x94}\x94\x8c\x08loes2023\x94\x8c3data/neut_standard_sets/loes2023_neut_standards.csv\x94s\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\x08upstream\x94\x8c\x1fCTCCCTACAATGTCGGATTTGTATTTAATAG\x94\x8c\ndownstream\x94\x8c\x00\x94\x8c\x04minq\x94K\x14\x8c\x11upstream_mismatch\x94K\x04\x8c\x0ebc_orientation\x94\x8c\x02R2\x94u\x8c$default_process_counts_qc_thresholds\x94}\x94(h\xd4K\xfah\xd5G?tz\xe1G\xae\x14{h\xd6G?\xb9\x99\x99\x99\x99\x99\x9ah\xd7K\x04h\xd8G?@bM\xd2\xf1\xa9\xfch\xd9G?tz\xe1G\xae\x14{h\xdaM^\x01h\xdbK\x05h\xdcK\x04h\xddK\x08u\x8c\x17default_curvefit_params\x94}\x94(\x8c\x18frac_infectivity_ceiling\x94K\x01\x8c\x06fixtop\x94\x89\x8c\tfixbottom\x94K\x00u\x8c\x06plates\x94}\x94(\x8c\x06plate2\x94}\x94(\x8c\x04date\x94\x8c\x08datetime\x94\x8c\x04date\x94\x93\x94C\x04\x07\xe7\x08\x01\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate2_samples.csv\x94\x8c\x1cprocess_counts_qc_thresholds\x94}\x94(h\xd4K\xfah\xd5G?tz\xe1G\xae\x14{h\xd6G?\xb9\x99\x99\x99\x99\x99\x9ah\xd7K\x04h\xd8G?@bM\xd2\xf1\xa9\xfch\xd9G?tz\xe1G\xae\x14{h\xdaM^\x01h\xdbK\x05h\xdcK\x04h\xddK\x08u\x8c\x10barcodes_to_drop\x94]\x94\x8c\x10GGTCCATCTCAGATCG\x94a\x8c\rwells_to_drop\x94]\x94(\x8c\x02C1\x94\x8c\x03D12\x94e\x8c\x0fcurvefit_params\x94}\x94(j\xdb\x01\x00\x00K\x01j\xdc\x01\x00\x00\x89j\xdd\x01\x00\x00K\x00uuji\x01\x00\x00}\x94(h\xcaj\xe5\x01\x00\x00C\x04\x07\xe7\t\x1a\x94\x85\x94R\x94h\xcch\xcdh\xceh\xcfh\xd0h\xd1h\xd2}\x94(h\xd4K\xfah\xd5G?tz\xe1G\xae\x14{h\xd6G?\xb9\x99\x99\x99\x99\x99\x9ah\xd7K\x04h\xd8G?@bM\xd2\xf1\xa9\xfch\xd9G?tz\xe1G\xae\x14{h\xdaM^\x01h\xdbK\x05h\xdcK\x04h\xddK\x08uh\xde]\x94h\xe0ah\xe1]\x94(h\xe3h\xe4e\x8c\x0fcurvefit_params\x94}\x94(j\xdb\x01\x00\x00K\x01j\xdc\x01\x00\x00\x89j\xdd\x01\x00\x00K\x00uuu\x8c\x1aserum_titers_qc_thresholds\x94}\x94(\x8c\x14min_frac_infectivity\x94G?\xe6ffffff\x8c\x0emin_replicates\x94K\x02\x8c\x1bmax_fold_change_from_median\x94K\x03u\x8c\x1aserum_titers_qc_exclusions\x94}\x94(\x8c\x06M099d0\x94}\x94(\x8c\x16A/Bangladesh/8002/2021\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x12A/Brisbane/02/2018\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x13A/Norway/25089/2022\x94}\x94\x8c\x12replicates_to_drop\x94]\x94\x8c\x18plate11-CGGATAAAAATGATAT\x94as\x8c\x14A/Wisconsin/588/2019\x94}\x94\x8c\x12replicates_to_drop\x94]\x94\x8c\x18plate11-AGTCCTATCCTCAAAT\x94as\x8c\x19A/SouthAfrica/R16462/2021\x94}\x94\x8c\x12replicates_to_drop\x94]\x94\x8c\x17plate2-CTAGCAGATTGTATAA\x94asu\x8c\x07M099d30\x94}\x94(\x8c\x13A/Chester/5355/2022\x94}\x94\x8c\x12replicates_to_drop\x94]\x94\x8c\x18plate11-CCTCAAAATAACAAGC\x94as\x8c\x12A/Michigan/45/2015\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x12A/Paris/31196/2021\x94}\x94\x8c\tignore_qc\x94\x88su\x8c\x07Y044d30\x94}\x94(\x8c\x16A/Bangladesh/8036/2021\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x12A/Brisbane/48/2022\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x1aA/India-PUN-NIV328484/2021\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x0eA/Perth/1/2022\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x14A/Washington/23/2020\x94}\x94\x8c\tignore_qc\x94\x88suuu\x8c\x04rule\x94\x8c\x0eprocess_counts\x94\x8c\x0fbench_iteration\x94N\x8c\tscriptdir\x94\x8ct/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks\x94ub.'); from snakemake.logging import logger; logger.printshellcmds = False; import os; os.chdir(r'/fh/fast/bloom_j/computational_notebooks/jbloom/2023/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/test_example');
-######## snakemake preamble end #########
-
import altair as alt
-
-import pandas as pd
-
-_ = alt.data_transformers.disable_max_rows()
-
Get the variables passed by snakemake
:
count_csvs = snakemake.input.count_csvs
-fate_csvs = snakemake.input.fate_csvs
-viral_library_csv = snakemake.input.viral_library_csv
-neut_standard_set_csv = snakemake.input.neut_standard_set_csv
-frac_infectivity_csv = snakemake.output.frac_infectivity_csv
-qc_failures_file = snakemake.output.qc_failures
-samples = snakemake.params.samples
-plate_params = snakemake.params.plate_params
-plate = snakemake.wildcards.plate
-
-samples_df = pd.DataFrame(plate_params["samples"])
-
-assert len(samples) == len(count_csvs) == len(fate_csvs) == len(samples_df)
-
-print(f"Processing {plate=}")
-
-# define set of QC failures for this plate
-qc_failures = set([])
-
-# get and print QC thresholds
-qc_thresholds = plate_params["process_counts_qc_thresholds"]
-display(pd.Series(qc_thresholds))
-
-# will drop the following samples from `wells_to_drop`
-wells_to_drop = plate_params["wells_to_drop"]
-if wells_to_drop:
- if not set(wells_to_drop).issubset(samples_df["well"]):
- raise ValueError(f"{wells_to_drop=} not all in `samples_df`")
- print("Dropping the following wells (samples):")
- display(samples_df.query("well in @wells_to_drop"))
- samples_df = samples_df.query("well not in @wells_to_drop")
-
Processing plate='plate11' --
avg_barcode_counts 250.0000 -min_neut_standard_frac 0.0050 -max_neut_standard_frac_no_serum 0.1000 -barcode_frac_consistency 4.0000 -min_viral_barcode_frac 0.0005 -min_neut_standard_barcode_frac 0.0050 -min_neut_standard_count 350.0000 -min_no_serum_viral_barcode_count 5.0000 -min_dilutions_per_serum_replicate 4.0000 -max_frac_infectivity 8.0000 -dtype: float64-
Dropping the following wells (samples): --
- | well | -serum | -dilution_factor | -replicate | -fastq | -serum_replicate | -sample_noplate | -sample | -plate | -plate_replicate | -
---|---|---|---|---|---|---|---|---|---|---|
23 | -C12 | -none | -NaN | -3 | -fastqs/Plate11_Noserum11_S91_R1_001.fastq.gz | -none-3 | -none-3 | -plate11_none-3 | -plate11 | -plate11-3 | -
29 | -D6 | -Y044d30 | -1620.0 | -2 | -fastqs/Y044_d30_conc5_S44_R1_001.fastq.gz | -Y044d30 | -Y044d30_1620 | -plate11_Y044d30_1620 | -plate11 | -plate11 | -
Statistics on barcode-parsing for each sample¶
Make interactive chart of the "fates" of the sequencing reads parsed for each sample on the plate.
-If most sequencing reads are not "valid barcodes", this could potentially indicate some problem in the sequencing or barcode set you are parsing.
-Potential fates are:
--
-
- valid barcode: barcode that matches a known virus or neutralization standard, we hope most reads are this. -
- invalid barcode: a barcode with proper flanking sequences, but does not match a known virus or neutralization standard. If you have a lot of reads of this type, it is probably a good idea to look at the invalid barcode CSVs (in the
./results/barcode_invalid/
subdirectory created by the pipeline) to see what these invalid barcodes are.
- - unparseable barcode: could not parse a barcode from this read as there was not a sequence of the correct length with the appropriate flanking sequence. -
- low quality barcode: low-quality or
N
nucleotides in barcode, could indicate problem with sequencing.
- - failed chastity filter: reads that failed the Illumina chastity filter, if these are reported in the FASTQ (they may not be). -
Also, if the number of reads per sample is very uneven, that could indicate that you did not do a good job of balancing the different samples in the Illumina sequencing.
-fates = (
- pd.concat([pd.read_csv(f).assign(sample=s) for f, s in zip(fate_csvs, samples)])
- .merge(samples_df, validate="many_to_one", on="sample")
- .assign(fate_counts=lambda x: x.groupby("fate")["count"].transform("sum"))
- .query("fate_counts > 0")[ # only keep fates with at least one count
- ["fate", "count", "well", "serum", "sample_noplate", "dilution_factor"]
- ]
-)
-
-assert len(fates) == len(fates.drop_duplicates())
-
-serum_selection = alt.selection_point(
- fields=["serum"],
- bind=alt.binding_select(
- options=[None] + sorted(fates["serum"].unique().tolist()),
- labels=["all"] + [str(s) for s in sorted(fates["serum"].unique())],
- name="serum",
- ),
-)
-
-fates_chart = (
- alt.Chart(fates)
- .add_params(serum_selection)
- .transform_filter(serum_selection)
- .encode(
- alt.X("count", scale=alt.Scale(nice=False, padding=3)),
- alt.Y(
- "sample_noplate",
- title=None,
- sort=list(
- fates.sort_values(["serum", "dilution_factor"])["sample_noplate"]
- ),
- ),
- alt.Color("fate", sort=sorted(fates["fate"].unique(), reverse=True)),
- alt.Order("fate", sort="descending"),
- tooltip=fates.columns.tolist(),
- )
- .mark_bar(height={"band": 0.85})
- .properties(
- height=alt.Step(10),
- width=200,
- title=f"Barcode parsing for {plate}",
- )
- .configure_axis(grid=False)
-)
-
-fates_chart
-
Counts per barcode¶
First get the counts per barcode and classification information on these barcodes:
-# get barcode counts
-counts = (
- pd.concat([pd.read_csv(c).assign(sample=s) for c, s in zip(count_csvs, samples)])
- .merge(samples_df, validate="many_to_one", on="sample")
- .drop(columns=["replicate", "plate", "fastq"])
-)
-
-# get classification of barcodes as viral or neut standard
-barcode_class = pd.concat(
- [
- pd.read_csv(viral_library_csv)[["barcode", "strain"]].assign(
- neut_standard=False,
- ),
- pd.read_csv(neut_standard_set_csv)[["barcode"]].assign(
- neut_standard=True,
- strain=pd.NA,
- ),
- ],
- ignore_index=True,
-)
-
-# merge counts and classification of barcodes
-assert set(counts["barcode"]) == set(barcode_class["barcode"])
-counts = counts.merge(barcode_class, on="barcode", validate="many_to_one")
-
Drop any barcodes that are specified to drop:
-barcodes_to_drop = plate_params["barcodes_to_drop"]
-
-if len(barcodes_to_drop):
- print(
- "The following barcodes are specified to drop:\n\t"
- + "\n\t".join(barcodes_to_drop)
- )
- invalid_barcodes = set(barcodes_to_drop) - set(counts["barcode"])
- if invalid_barcodes:
- raise ValueError(f"Barcodes to drop do not exist: {invalid_barcodes}")
- counts = counts.query("barcode not in @barcodes_to_drop")
-
-else:
- print("No barcodes specified to drop.")
-
The following barcodes are specified to drop: - GGTCCATCTCAGATCG --
Plot average counts per barcode, and make sure that these pass the QC threshold. -If a sample has inadequate barcode counts, it may not have good enough statistics for accurate analysis:
-avg_barcode_counts = (
- counts.groupby(
- ["well", "serum", "dilution_factor", "sample_noplate"],
- dropna=False,
- as_index=False,
- )
- .aggregate(avg_count=pd.NamedAgg("count", "mean"))
- .assign(passes_qc=lambda x: x["avg_count"] >= qc_thresholds["avg_barcode_counts"])
-)
-
-avg_barcode_counts_chart = (
- alt.Chart(avg_barcode_counts)
- .add_params(serum_selection)
- .transform_filter(serum_selection)
- .encode(
- alt.X(
- "avg_count",
- title="average counts per barcode",
- scale=alt.Scale(nice=False, padding=3),
- ),
- alt.Y(
- "sample_noplate",
- title=None,
- sort=list(
- avg_barcode_counts.sort_values(["serum", "dilution_factor"])[
- "sample_noplate"
- ]
- ),
- ),
- alt.Color(
- "passes_qc",
- title=f"passes QC threshold {qc_thresholds['avg_barcode_counts']}",
- scale=alt.Scale(domain=[True, False]),
- ),
- tooltip=[
- alt.Tooltip(c, format=".3g") if avg_barcode_counts[c].dtype == float else c
- for c in avg_barcode_counts.columns
- ],
- )
- .mark_bar(height={"band": 0.85})
- .properties(
- height=alt.Step(10),
- width=250,
- title=f"Average barcode counts for {plate}",
- )
- .configure_axis(grid=False)
-)
-
-display(avg_barcode_counts_chart)
-
-if not avg_barcode_counts["passes_qc"].all():
- qc_failures.add("avg_barcode_counts")
- print(f"\nThe following samples failed {qc_thresholds['avg_barcode_counts']=}")
- display(avg_barcode_counts.query("not passes_qc").reset_index(drop=True))
-else:
- print(f"\nAll samples passed {qc_thresholds['avg_barcode_counts']=}")
-
-All samples passed qc_thresholds['avg_barcode_counts']=250 --
Fraction of counts from neutralization standard¶
Determine the fraction of counts from the neutralization standard in each sample, and make sure this fraction passess the QC threshold.
-neut_standard_fracs = (
- counts.assign(
- neut_standard_count=lambda x: x["count"] * x["neut_standard"].astype(int)
- )
- .groupby(
- ["well", "serum", "dilution_factor", "sample_noplate"],
- dropna=False,
- as_index=False,
- )
- .aggregate(
- total_count=pd.NamedAgg("count", "sum"),
- neut_standard_count=pd.NamedAgg("neut_standard_count", "sum"),
- )
- .assign(
- neut_standard_frac=lambda x: x["neut_standard_count"] / x["total_count"],
- passes_qc=lambda x: (
- (x["neut_standard_frac"] >= qc_thresholds["min_neut_standard_frac"])
- & (
- (x["serum"] != "none")
- | (
- x["neut_standard_frac"]
- <= qc_thresholds["max_neut_standard_frac_no_serum"]
- )
- )
- ),
- )
-)
-
-neut_standard_qc_desc = (
- f"neut standard frac >= {qc_thresholds['min_neut_standard_frac']}, "
- + f"<= {qc_thresholds['max_neut_standard_frac_no_serum']} for no-serum samples"
-)
-
-neut_standard_fracs_chart = (
- alt.Chart(neut_standard_fracs)
- .add_params(serum_selection)
- .transform_filter(serum_selection)
- .encode(
- alt.X(
- "neut_standard_frac",
- title="fraction of counts from neutralization standard",
- scale=alt.Scale(nice=False, padding=3),
- ),
- alt.Y(
- "sample_noplate",
- title=None,
- sort=list(
- neut_standard_fracs.sort_values(["serum", "dilution_factor"])[
- "sample_noplate"
- ]
- ),
- ),
- alt.Color(
- "passes_qc",
- title=neut_standard_qc_desc,
- scale=alt.Scale(domain=[True, False]),
- ),
- tooltip=[
- alt.Tooltip(c, format=".3g") if neut_standard_fracs[c].dtype == float else c
- for c in neut_standard_fracs.columns
- ],
- )
- .mark_bar(height={"band": 0.85})
- .properties(
- height=alt.Step(10),
- width=250,
- title=f"Neutralization-standard fractions for {plate}",
- )
- .configure_axis(grid=False)
- .configure_legend(titleLimit=1000)
-)
-
-display(neut_standard_fracs_chart)
-
-if not neut_standard_fracs["passes_qc"].all():
- qc_failures.add("min_neut_standard_frac or max_neut_standard_frac_no_serum")
- print(f"\nThe following samples failed {neut_standard_qc_desc}")
- display(neut_standard_fracs.query("not passes_qc").reset_index(drop=True))
-else:
- print(f"\nAll samples passed {neut_standard_qc_desc}")
-
-All samples passed neut standard frac >= 0.005, <= 0.1 for no-serum samples --
Consistency and minimum fractions for barcodes¶
We examine the fraction of counts attributable to each barcode. We do this splitting the data two ways:
--
-
Looking at all viral (but not neut-standard) barcodes only for the no-serum samples.
-
-Looking at just the neut-standard barcodes for all samples.
-
-
The reasons is that if the experiment is set up perfectly, these fractions should be the same across all samples for each barcode. -(We do not expect viral barcodes to have consistent fractions across no-serum samples as they will be neutralized differently depending on strain).
-We plot these fractions in interactive plots (you can mouseover points and zoom) so you can identify barcodes that fail the expected consistency QC thresholds.
-We also make sure the barcodes meet specified QC minimum thresholds for all samples, and flag any that do not.
-barcode_selection = alt.selection_point(fields=["barcode"], on="mouseover", empty=False)
-
-# look at all samples for neut standard barcodes, or no-serum samples for all barcodes
-for is_neut_standard, df in counts.groupby("neut_standard"):
- # process data frame
- if not is_neut_standard:
- df = df.query("serum == 'none'")
- df = df.assign(
- sample_counts=lambda x: x.groupby("sample")["count"].transform("sum"),
- count_frac=lambda x: x["count"] / x["sample_counts"],
- median_count_frac=lambda x: x.groupby("barcode")["count_frac"].transform(
- "median"
- ),
- fold_change_from_median=lambda x: x["count_frac"] / x["median_count_frac"],
- ).drop(
- columns=(
- ["sample", "serum_replicate", "sample_counts", "neut_standard"]
- + (["strain"] if is_neut_standard else ["dilution_factor"])
- ),
- )
-
- # make chart
- evenness_chart = (
- alt.Chart(df)
- .add_params(barcode_selection)
- .encode(
- alt.X(
- "count_frac",
- title=(
- "barcode's fraction of neut standard counts"
- if is_neut_standard
- else "barcode's fraction of non-neut standard counts"
- ),
- scale=alt.Scale(nice=False, padding=5),
- ),
- alt.Y(
- "sample_noplate",
- title=None,
- sort=list(
- neut_standard_fracs.sort_values(["serum", "dilution_factor"])[
- "sample_noplate"
- ]
- ),
- ),
- alt.Fill("barcode", legend=None),
- strokeWidth=alt.condition(barcode_selection, alt.value(2), alt.value(0)),
- size=alt.condition(barcode_selection, alt.value(60), alt.value(35)),
- tooltip=[
- alt.Tooltip(c, format=".3g") if df[c].dtype == float else c
- for c in df.columns
- ],
- )
- .mark_circle(fillOpacity=0.6, stroke="black", strokeOpacity=1)
- .properties(
- height=alt.Step(10),
- width=300,
- title=(
- f"{plate} all samples, neut-standard barcodes"
- if is_neut_standard
- else f"{plate} no-serum samples, all barcodes"
- ),
- )
- .configure_axis(grid=False)
- .configure_legend(titleLimit=1000)
- .interactive()
- )
-
- if is_neut_standard:
- evenness_chart = evenness_chart.add_params(serum_selection).transform_filter(
- serum_selection
- )
- print(f"\n\n{'=' * 89}\nAnalyzing neut-standard barcodes from all samples\n")
- else:
- print(f"\n\n{'=' * 89}\nAnalyzing all barcodes from no-serum samples\n")
-
- display(evenness_chart)
-
- # make sure barcode fractions are reasonably consistent when they should be
- excess_fold_change = df[
- (df["fold_change_from_median"] <= 1 / qc_thresholds["barcode_frac_consistency"])
- | (df["fold_change_from_median"] >= qc_thresholds["barcode_frac_consistency"])
- ]
- if len(excess_fold_change):
- print(
- f"\nFollowing barcodes failed {qc_thresholds['barcode_frac_consistency']=}"
- )
- display(excess_fold_change)
- qc_failures.add("barcode_frac_consistency")
- else:
- print(f"\nPassed {qc_thresholds['barcode_frac_consistency']=}")
-
- # make sure barcodes have sufficient fraction
- if is_neut_standard:
- insufficient_neut_standard_barcode_frac = df[
- df["count_frac"] < qc_thresholds["min_neut_standard_barcode_frac"]
- ]
- if len(insufficient_neut_standard_barcode_frac):
- print(
- "\nFollowing barcodes fail "
- + f"{qc_thresholds['min_neut_standard_barcode_frac']=}"
- )
- display(insufficient_neut_standard_barcode_frac)
- qc_failures.add("min_neut_standard_barcode_frac")
- else:
- print(f"\nPassed {qc_thresholds['min_neut_standard_barcode_frac']=}")
- else:
- insufficient_viral_barcode_frac = df[
- df["count_frac"] < qc_thresholds["min_viral_barcode_frac"]
- ]
- if len(insufficient_viral_barcode_frac):
- print(
- f"\nFollowing barcodes fail {qc_thresholds['min_viral_barcode_frac']=}"
- )
- display(insufficient_viral_barcode_frac)
- qc_failures.add("min_viral_barcode_frac")
- else:
- print(f"\nPassed {qc_thresholds['min_viral_barcode_frac']=}")
-
- -========================================================================================= -Analyzing all barcodes from no-serum samples - --
-Passed qc_thresholds['barcode_frac_consistency']=4 - -Passed qc_thresholds['min_viral_barcode_frac']=0.0005 - - -========================================================================================= -Analyzing neut-standard barcodes from all samples - --
-Passed qc_thresholds['barcode_frac_consistency']=4 - -Passed qc_thresholds['min_neut_standard_barcode_frac']=0.005 --
Compute fraction infectivity¶
The fraction infectivity for viral barcode $v_b$ in sample $s$ is computed as: -$$ -F_{v_b,s} = \frac{c_{v_b,s} / \left(\sum_{n_b} c_{n_b,s}\right)}{{\rm median}_{s_0}\left[ c_{v_b,s_0} / \left(\sum_{n_b} c_{n_b,s_0}\right)\right]} -$$ -where
--
-
- $c_{v_b,s}$ is the counts of viral barcode $v_b$ in sample $s$. -
- $\sum_{n_b} c_{n_b,s}$ is the sum of the counts for all neutralization standard barcodes $n_b$ for sample $s$. -
- $c_{v_b,s_0}$ is the counts of viral barcode $v_b$ in no-serum sample $s_0$. -
- $\sum_{n_b} c_{n_b,s_0}$ is the sum of the counts for all neutralization standard barcodes $n_b$ for no-serum sample $s_0$. -
- ${\rm median}_{s_0}\left[ c_{v_b,s_0} / \left(\sum_{n_b} c_{n_b,s_0}\right)\right]$ is the median taken across all no-serum samples of the counts of viral barcode $v_b$ versus the total counts for all neutralization standard barcodes. -
First, compute the total neutralization-standard counts for each sample. -Plot these, and make sure they meet the QC threshold.
-neut_standard_counts = (
- counts.query("neut_standard")
- .groupby(
- ["well", "serum", "sample_noplate", "dilution_factor"],
- dropna=False,
- as_index=False,
- )
- .aggregate(neut_standard_count=pd.NamedAgg("count", "sum"))
- .assign(
- passes_qc=lambda x: (
- x["neut_standard_count"] >= qc_thresholds["min_neut_standard_count"]
- ),
- )
-)
-
-neut_standard_counts_chart = (
- alt.Chart(neut_standard_counts)
- .add_params(serum_selection)
- .transform_filter(serum_selection)
- .encode(
- alt.X(
- "neut_standard_count",
- title="counts from neutralization standard",
- scale=alt.Scale(nice=False, padding=3),
- ),
- alt.Y(
- "sample_noplate",
- title=None,
- sort=list(
- neut_standard_counts.sort_values(["serum", "dilution_factor"])[
- "sample_noplate"
- ]
- ),
- ),
- alt.Color(
- "passes_qc",
- title=f"at least {qc_thresholds['min_neut_standard_count']} counts",
- scale=alt.Scale(domain=[True, False]),
- ),
- tooltip=[
- alt.Tooltip(c, format=".3g")
- if neut_standard_counts[c].dtype == float
- else c
- for c in neut_standard_counts.columns
- ],
- )
- .mark_bar(height={"band": 0.85})
- .properties(
- height=alt.Step(10),
- width=250,
- title=f"Neutralization-standard counts for {plate}",
- )
- .configure_axis(grid=False)
- .configure_legend(titleLimit=1000)
-)
-
-display(neut_standard_counts_chart)
-
-if (neut_standard_counts["passes_qc"]).all():
- print(f"\nAll samples pass {qc_thresholds['min_neut_standard_count']=}")
-else:
- print(f"\nSamples failing {qc_thresholds['min_neut_standard_count']=}")
- display(neut_standard_counts.query("not passes_qc"))
- qc_failures.add("min_neut_standard_count")
-
-All samples pass qc_thresholds['min_neut_standard_count']=350 --
Compute and plot the no-serum sample viral barcode counts and check if they pass the QC filters.
-no_serum_counts = (
- counts.query("serum == 'none'")
- .query("not neut_standard")
- .merge(neut_standard_counts, validate="many_to_one")[
- ["barcode", "strain", "well", "sample_noplate", "count", "neut_standard_count"]
- ]
- .assign(
- passes_qc=lambda x: (
- x["count"] >= qc_thresholds["min_no_serum_viral_barcode_count"]
- ),
- )
-)
-
-# make chart
-no_serum_counts_chart = (
- alt.Chart(no_serum_counts)
- .add_params(barcode_selection)
- .encode(
- alt.X(
- "count", title="viral barcode count", scale=alt.Scale(nice=False, padding=5)
- ),
- alt.Y("sample_noplate", title=None),
- alt.Fill("barcode", legend=None),
- strokeWidth=alt.condition(barcode_selection, alt.value(2), alt.value(0)),
- size=alt.condition(barcode_selection, alt.value(60), alt.value(35)),
- tooltip=no_serum_counts.columns.tolist(),
- )
- .mark_circle(fillOpacity=0.6, stroke="black", strokeOpacity=1)
- .properties(
- height=alt.Step(10),
- width=300,
- title=f"{plate} viral barcode counts in no-serum samples",
- )
- .configure_axis(grid=False)
- .configure_legend(titleLimit=1000)
- .interactive()
-)
-
-display(no_serum_counts_chart)
-
-# QC check
-if (no_serum_counts["passes_qc"]).all():
- print(f"\nAll samples pass {qc_thresholds['min_no_serum_viral_barcode_count']=}")
-else:
- print(f"\nSamples failing {qc_thresholds['min_no_serum_viral_barcode_count']=}")
- display(no_serum_counts.query("not passes_qc"))
- qc_failures.add("min_no_serum_viral_barcode_count")
-
-All samples pass qc_thresholds['min_no_serum_viral_barcode_count']=5 --
Compute and plot the median ratio of viral barcode count to neut standard counts across no-serum samples:
-median_no_serum_ratio = (
- no_serum_counts.assign(ratio=lambda x: x["count"] / x["neut_standard_count"])
- .groupby(["barcode", "strain"], as_index=False)
- .aggregate(median_no_serum_ratio=pd.NamedAgg("ratio", "median"))
-)
-
-strain_selection = alt.selection_point(fields=["strain"], on="mouseover", empty=False)
-
-median_no_serum_ratio_chart = (
- alt.Chart(median_no_serum_ratio)
- .add_params(strain_selection)
- .encode(
- alt.X(
- "median_no_serum_ratio",
- title="median ratio of counts",
- scale=alt.Scale(nice=False, padding=5),
- ),
- alt.Y(
- "barcode",
- sort=alt.SortField("median_no_serum_ratio", order="descending"),
- axis=alt.Axis(labelFontSize=5),
- ),
- color=alt.condition(strain_selection, alt.value("orange"), alt.value("gray")),
- tooltip=[
- alt.Tooltip(c, format=".3g")
- if median_no_serum_ratio[c].dtype == float
- else c
- for c in median_no_serum_ratio.columns
- ],
- )
- .mark_bar(height={"band": 0.85})
- .properties(
- height=alt.Step(5),
- width=250,
- title=f"{plate} no-serum median ratio viral barcode to neut-standard barcode",
- )
- .configure_axis(grid=False)
- .configure_legend(titleLimit=1000)
-)
-
-display(median_no_serum_ratio_chart)
-
Compute the actual fraction infectivities, QC check if any are null (from zero counts), and also plot and check if any exceed the max_frac_infectivity
:
frac_infectivity = (
- counts.query("not neut_standard")
- .query("serum != 'none'")
- .merge(median_no_serum_ratio, validate="many_to_one")
- .merge(
- neut_standard_counts.drop(columns="passes_qc"),
- validate="many_to_one",
- )
- .assign(
- frac_infectivity=lambda x: (
- (x["count"] / x["neut_standard_count"]) / x["median_no_serum_ratio"]
- ),
- passes_qc=lambda x: x["frac_infectivity"]
- <= qc_thresholds["max_frac_infectivity"],
- )[
- [
- "barcode",
- "strain",
- "serum",
- "serum_replicate",
- "plate_replicate",
- "dilution_factor",
- "frac_infectivity",
- "sample_noplate",
- "well",
- "passes_qc",
- ]
- ]
-)
-
-assert len(
- frac_infectivity.groupby(["barcode", "serum", "plate_replicate", "dilution_factor"])
-) == len(frac_infectivity)
-assert frac_infectivity["dilution_factor"].notnull().all()
-
-frac_infectivity_chart = (
- alt.Chart(frac_infectivity)
- .add_params(serum_selection, barcode_selection)
- .transform_filter(serum_selection)
- .encode(
- alt.X(
- "frac_infectivity",
- title="fraction infectivity",
- scale=alt.Scale(nice=False, padding=3),
- ),
- alt.Y(
- "sample_noplate",
- title=None,
- sort=list(
- neut_standard_counts.sort_values(["serum", "dilution_factor"])[
- "sample_noplate"
- ]
- ),
- ),
- strokeWidth=alt.condition(barcode_selection, alt.value(2), alt.value(0)),
- size=alt.condition(barcode_selection, alt.value(60), alt.value(35)),
- color=alt.Color(
- "passes_qc",
- title=f"frac_infectivity <= {qc_thresholds['max_frac_infectivity']}",
- scale=alt.Scale(domain=[True, False]),
- ),
- tooltip=[
- alt.Tooltip(c, format=".3g") if frac_infectivity[c].dtype == float else c
- for c in frac_infectivity.columns
- ],
- )
- .mark_circle(stroke="black", strokeOpacity=1)
- .properties(
- height=alt.Step(10),
- width=250,
- title=f"Fraction infectivities for {plate}",
- )
- .configure_axis(grid=False)
- .configure_legend(titleLimit=1000)
-)
-
-display(frac_infectivity_chart)
-
-if not frac_infectivity["passes_qc"].all():
- print(f"\nSome barcode-samples fail {qc_thresholds['max_frac_infectivity']=}")
- display(frac_infectivity.query("not passes_qc"))
- qc_failures.add("max_frac_infectivity")
-else:
- print(f"\nAll barcode-samples pass {qc_thresholds['max_frac_infectivity']=}")
-
-if frac_infectivity["frac_infectivity"].isnull().any():
- print("\nSome barcodes have undefined fraction infectivity due to zero counts:")
- display(frac_infectivity.query("frac_infectivity.isnull()"))
- qc_failures.add("null_frac_infectivity")
-else:
- print("\nNo undefined fraction infectivities")
-
-All barcode-samples pass qc_thresholds['max_frac_infectivity']=8 - -No undefined fraction infectivities --
Write fraction infectivities to file:
-print(f"\nWriting fraction infectivities to {frac_infectivity_csv}")
-(
- frac_infectivity[
- [
- "barcode",
- "strain",
- "serum",
- "plate_replicate",
- "dilution_factor",
- "frac_infectivity",
- ]
- ]
- .sort_values(["serum", "plate_replicate", "dilution_factor", "barcode"])
- .to_csv(frac_infectivity_csv, index=False, float_format="%.5g")
-)
-
-Writing fraction infectivities to results/plates/plate11/frac_infectivity.csv --
Make sure we have enough dilutions with non-null fraction infectivities for each serum-replicate:
-n_dilutions = (
- frac_infectivity.query("frac_infectivity.notnull()")
- .groupby("serum_replicate")
- .aggregate(n_dilutions=pd.NamedAgg("dilution_factor", "nunique"))
- .assign(
- fails_qc=lambda x: (
- x["n_dilutions"] <= qc_thresholds["min_dilutions_per_serum_replicate"]
- ),
- )
-)
-
-if n_dilutions["fails_qc"].any():
- print(f"Failing {qc_thresholds['min_dilutions_per_serum_replicate']=}:")
- display(n_dilutions.query("fails_qc"))
- qc_failures.add("min_dilutions_per_serum_replicate")
-else:
- print(f"Passed {qc_thresholds['min_dilutions_per_serum_replicate']=}:")
-
Passed qc_thresholds['min_dilutions_per_serum_replicate']=4: --
Summarize all QC failures and write to file:
-qc_failures = "\n".join(sorted(qc_failures))
-
-if qc_failures:
- print(f"Encountered the following QC failures:\n{qc_failures}")
-else:
- print("No QC failures")
-
-print(f"\nLogging QC failures to {qc_failures_file}")
-with open(qc_failures_file, "w") as f:
- f.write(qc_failures)
-
No QC failures - -Logging QC failures to results/plates/plate11/process_counts_qc_failures.txt --
-