diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index c344b50a..32bdb8dd 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -95,7 +95,8 @@ rule prepare_auspice_config: ], "display_defaults": { "map_triplicate": True, - "color_by": params.replace_clade_key + "color_by": params.replace_clade_key, + "tip_label": "strain" }, "filters": [ "country", @@ -110,6 +111,7 @@ rule prepare_auspice_config: ], "metadata_columns": [ "accession", + "strain", "url" ] } @@ -150,7 +152,7 @@ rule export: auspice_config = "results/defaults/{gene}/auspice_config_{serotype}.json", colors = "results/colors_{serotype}.tsv", output: - auspice_json = "results/{gene}/raw_dengue_{serotype}.json" + auspice_json = "results/{gene}/dengue_{serotype}.json" params: strain_id = config.get("strain_id_field", "strain"), shell: @@ -167,25 +169,6 @@ rule export: --output {output.auspice_json} """ -rule final_strain_name: - input: - auspice_json="results/{gene}/raw_dengue_{serotype}.json", - metadata="data/metadata_{serotype}.tsv", - output: - auspice_json="auspice/dengue_{serotype}_{gene}.json" - params: - strain_id=config.get("strain_id_field", "strain"), - display_strain_field=config.get("display_strain_field", "strain"), - shell: - """ - python3 scripts/set_final_strain_name.py \ - --metadata {input.metadata} \ - --metadata-id-columns {params.strain_id} \ - --input-auspice-json {input.auspice_json} \ - --display-strain-name {params.display_strain_field} \ - --output {output.auspice_json} - """ - rule tip_frequencies: """ Estimating KDE frequencies for tips @@ -196,7 +179,7 @@ rule tip_frequencies: output: tip_freq = "auspice/dengue_{serotype}_{gene}_tip-frequencies.json" params: - strain_id = config["display_strain_field"], + strain_id = config["strain_id_field"], min_date = config["tip_frequencies"]["min_date"], max_date = config["tip_frequencies"]["max_date"], narrow_bandwidth = config["tip_frequencies"]["narrow_bandwidth"], diff --git a/phylogenetic/scripts/set_final_strain_name.py b/phylogenetic/scripts/set_final_strain_name.py deleted file mode 100755 index 08ca9359..00000000 --- a/phylogenetic/scripts/set_final_strain_name.py +++ /dev/null @@ -1,38 +0,0 @@ -import pandas as pd -import json, argparse -from augur.io import read_metadata - -def replace_name_recursive(node, lookup): - if node["name"] in lookup: - node["name"] = lookup[node["name"]] - - if "children" in node: - for child in node["children"]: - replace_name_recursive(child, lookup) - -if __name__=="__main__": - parser = argparse.ArgumentParser( - description="Swaps out the strain names in the Auspice JSON with the final strain name", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - parser.add_argument('--input-auspice-json', type=str, required=True, help="input auspice_json") - parser.add_argument('--metadata', type=str, required=True, help="input data") - parser.add_argument('--metadata-id-columns', nargs="+", help="names of possible metadata columns containing identifier information, ordered by priority. Only one ID column will be inferred.") - parser.add_argument('--display-strain-name', type=str, required=True, help="field to use as strain name in auspice") - parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON") - args = parser.parse_args() - - metadata = read_metadata(args.metadata, id_columns=args.metadata_id_columns) - name_lookup = {} - for ri, row in metadata.iterrows(): - strain_id = row.name - name_lookup[strain_id] = args.display_strain_name if pd.isna(row[args.display_strain_name]) else row[args.display_strain_name] - - with open(args.input_auspice_json, 'r') as fh: - data = json.load(fh) - - replace_name_recursive(data['tree'], name_lookup) - - with open(args.output, 'w') as fh: - json.dump(data, fh)