-
Notifications
You must be signed in to change notification settings - Fork 1
/
config.yaml
50 lines (43 loc) · 2.71 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# Configuration file
# Pre-built UShER tree, from:
# https://hgdownload.gi.ucsc.edu/hubs/GCF/000/864/105/GCF_000864105.1/UShER_NC_007362.1/
usher_prebuilt:
tree: https://hgdownload.gi.ucsc.edu/hubs/GCF/000/864/105/GCF_000864105.1/UShER_NC_007362.1/fluA.GCF_000864105.1.NC_007362.1.latest.pb.gz
version_info: https://hgdownload.gi.ucsc.edu/hubs/GCF/000/864/105/GCF_000864105.1/UShER_NC_007362.1/fluA.GCF_000864105.1.NC_007362.1.latest.version.txt
ref: NC_007362.1
ref_cds_coords: [22, 1728]
# Deep mutational scanning (DMS) data from
# https://github.com/dms-vep/Flu_H5_American-Wigeon_South-Carolina_2021-H5N1_DMS
dms:
prot: https://raw.githubusercontent.com/dms-vep/Flu_H5_American-Wigeon_South-Carolina_2021-H5N1_DMS/main/results/gene_sequence/protein.fasta
site_numbering_map: https://raw.githubusercontent.com/dms-vep/Flu_H5_American-Wigeon_South-Carolina_2021-H5N1_DMS/main/data/site_numbering_map.csv
site_numbering_schemes: # map name to use to column in site_numbering_map
sequential: sequential
mature_H3: reference
mature_H5: mature_H5
phenotypes: https://raw.githubusercontent.com/dms-vep/Flu_H5_American-Wigeon_South-Carolina_2021-H5N1_DMS/main/results/summaries/phenotypes.csv
escape_by_species: https://raw.githubusercontent.com/dms-vep/Flu_H5_American-Wigeon_South-Carolina_2021-H5N1_DMS/main/results/summaries/phenotypes_per_antibody_escape.csv
# for DMS phenotypes, keep this many digits to the right of the decimal
dms_decimal_scale: 3
# For the scatter plots of phenotypes, initial phenotypes for x and y-axis
pheno_scatter_init_phenos:
x: ferret_sera_escape_increase
y: frac_divergence
# Filters for analyses: keep just strains fulfilling filter for each analysis
analysis_filters:
cattle:
frac_aligned: 0.98 # require >= this fraction of residues to be aligned (not gapped)
frac_divergence: 0.1 # require divergence among aligned residues to be <= this
strain_regex: "(?i)cattle(?!_egret)|(?i)dairy_cow" # require strain to contain this regex
max-diff-0.04:
frac_aligned: 0.98 # require >= this fraction of residues to be aligned (not gapped)
frac_divergence: 0.04 # require divergence among aligned residues to be <= this
max-diff-0.08:
frac_aligned: 0.98 # require >= this fraction of residues to be aligned (not gapped)
frac_divergence: 0.08 # require divergence among aligned residues to be <= this
max-diff-0.12:
frac_aligned: 0.98 # require >= this fraction of residues to be aligned (not gapped)
frac_divergence: 0.12 # require divergence among aligned residues to be <= this
title: Influenza H5 HA annotated by deep mutational scanning phenotypes
description: |
See https://github.com/jbloomlab/Flu-HA-H5-2.3.4.4-DMS-informed-surveillance for details.