diff --git a/.dockstore.yml b/.dockstore.yml
index bda39fc51..12a5ed2a2 100644
--- a/.dockstore.yml
+++ b/.dockstore.yml
@@ -50,6 +50,11 @@ workflows:
     primaryDescriptorPath: /pipes/WDL/workflows/augur_from_msa.wdl
     testParameterFiles:
       - empty.json
+  - name: augur_from_msa_with_subsampler
+    subclass: WDL
+    primaryDescriptorPath: /pipes/WDL/workflows/augur_from_msa_with_subsampler.wdl
+    testParameterFiles:
+      - empty.json
   - name: bams_multiqc
     subclass: WDL
     primaryDescriptorPath: /pipes/WDL/workflows/bams_multiqc.wdl
@@ -324,6 +329,11 @@ workflows:
     primaryDescriptorPath: /pipes/WDL/workflows/scaffold_and_refine.wdl
     testParameterFiles:
       - empty.json
+  - name: subsample_by_casecounts
+    subclass: WDL
+    primaryDescriptorPath: /pipes/WDL/workflows/subsample_by_casecounts.wdl
+    testParameterFiles:
+      - empty.json
   - name: subsample_by_metadata
     subclass: WDL
     primaryDescriptorPath: /pipes/WDL/workflows/subsample_by_metadata.wdl
@@ -358,4 +368,4 @@ workflows:
     subclass: WDL
     primaryDescriptorPath: /pipes/WDL/workflows/bam_to_qiime.wdl
     testParameterFiles:
-      - empty.json
\ No newline at end of file
+      - empty.json
diff --git a/pipes/WDL/tasks/tasks_interhost.wdl b/pipes/WDL/tasks/tasks_interhost.wdl
index 14598f19c..e19f29557 100644
--- a/pipes/WDL/tasks/tasks_interhost.wdl
+++ b/pipes/WDL/tasks/tasks_interhost.wdl
@@ -1,5 +1,156 @@
 version 1.0
 
+task subsample_by_cases {
+    meta {
+        description: "Run subsampler to get downsampled dataset and metadata proportional to epidemiological case counts."
+    }
+    input {
+        File    metadata
+        File    case_data
+
+        String  id_column
+        String  geo_column
+        String  date_column     =   "date"
+        String  unit            =   "week"
+
+        File?   keep_file
+        File?   remove_file
+        File?   filter_file
+        Float   baseline        =   0.0001
+        Int?    seed_num
+        String? start_date
+        String? end_date
+
+        String  docker = "quay.io/broadinstitute/subsampler"
+        Int     machine_mem_gb  = 30
+    }
+    command <<<
+        set -e -o pipefail
+        mkdir -p data outputs
+
+        # decompress if compressed
+        echo "staging and decompressing input data files"
+        if [[ ~{metadata} == *.gz ]]; then
+          cat "~{metadata}" | pigz -d > data/metadata.tsv
+        elif [[ ~{metadata} == *.zst ]]; then
+          cat "~{metadata}" | zstd -d > data/metadata.tsv
+        else
+          ln -s "~{metadata}" data/metadata.tsv
+        fi
+        if [[ ~{case_data} == *.gz ]]; then
+          cat "~{case_data}" | pigz -d > data/case_data.tsv
+        elif [[ ~{case_data} == *.zst ]]; then
+          cat "~{case_data}" | zstd -d > data/case_data.tsv
+        else
+          ln -s "~{case_data}" data/case_data.tsv
+        fi
+
+        ## replicate snakemake DAG manually
+        # rule genome_matrix
+        # Generate matrix of genome counts per day, for each element in column ~{geo_column}
+        echo "getting genome matrix"
+        python3 /opt/subsampler/scripts/get_genome_matrix.py \
+          --metadata data/metadata.tsv \
+          --index-column ~{geo_column} \
+          --date-column ~{date_column} \
+          ~{"--start-date " + start_date} \
+          ~{"--end-date " + end_date} \
+          --output outputs/genome_matrix_days.tsv
+        date;uptime;free
+
+        # rule unit_conversion
+        # Generate matrix of genome and case counts per epiweek
+        echo "converting matricies to epiweeks"
+        python3 /opt/subsampler/scripts/aggregator.py \
+          --input outputs/genome_matrix_days.tsv \
+          --unit ~{unit} \
+          --format integer \
+          --output outputs/matrix_genomes_unit.tsv
+        python3 /opt/subsampler/scripts/aggregator.py \
+          --input data/case_data.tsv \
+          --unit ~{unit} \
+          --format integer \
+          ~{"--start-date " + start_date} \
+          ~{"--end-date " + end_date} \
+          --output outputs/matrix_cases_unit.tsv
+        date;uptime;free
+
+        # rule correct_bias
+        # Correct under- and oversampling genome counts based on epidemiological data
+        echo "create bias-correction matrix"
+        python3 /opt/subsampler/scripts/correct_bias.py \
+          --genome-matrix outputs/matrix_genomes_unit.tsv \
+          --case-matrix outputs/matrix_cases_unit.tsv \
+          --index-column code \
+          ~{"--baseline " + baseline} \
+          --output1 outputs/weekly_sampling_proportions.tsv \
+          --output2 outputs/weekly_sampling_bias.tsv \
+          --output3 outputs/matrix_genomes_unit_corrected.tsv
+        date;uptime;free
+
+        # rule subsample
+        # Sample genomes and metadata according to the corrected genome matrix
+        echo "subsample data according to bias-correction"
+        # subsampler_timeseries says --keep is optional but actually fails if you don't specify one
+        cp /dev/null data/keep.txt
+        ~{"cp " + keep_file + " data/keep.txt"}
+        python3 /opt/subsampler/scripts/subsampler_timeseries.py \
+          --metadata data/metadata.tsv \
+          --genome-matrix outputs/matrix_genomes_unit_corrected.tsv \
+          --index-column ~{id_column} \
+          --geo-column ~{geo_column} \
+          --date-column ~{date_column} \
+          --time-unit ~{unit} \
+          --keep data/keep.txt \
+          ~{"--remove " + remove_file} \
+          ~{"--filter-file " + filter_file} \
+          ~{"--seed " + seed_num} \
+          ~{"--start-date " + start_date} \
+          ~{"--end-date " + end_date} \
+          --weekasdate no \
+          --sampled-sequences outputs/selected_sequences.txt \
+          --sampled-metadata outputs/selected_metadata.tsv \
+          --report outputs/sampling_stats.txt
+        echo '# Sampling proportion: ~{baseline}' | cat - outputs/sampling_stats.txt > temp && mv temp outputs/sampling_stats.txt
+        date;uptime;free
+
+        # copy outputs from container's temp dir to host-accessible working dir for delocalization
+        echo "wrap up"
+        mv outputs/* .
+        # get counts
+        cat selected_sequences.txt | wc -l | tee NUM_OUT
+        # get hardware utilization
+        set +o pipefail
+        cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC
+        cat /proc/loadavg > CPU_LOAD
+        { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES
+
+    >>>
+    runtime {
+        docker: docker
+        memory: machine_mem_gb + " GB"
+        cpu:    2
+        disks:  "local-disk 200 HDD"
+        disk:   "200 GB"
+        dx_instance_type: "mem3_ssd1_v2_x4"
+    }
+    output {
+        File    genome_matrix_days              =   "genome_matrix_days.tsv"
+        File    matrix_genomes_unit             =   "matrix_genomes_unit.tsv"
+        File    matrix_cases_unit               =   "matrix_cases_unit.tsv"
+        File    weekly_sampling_proportions     =   "weekly_sampling_proportions.tsv"
+        File    weekly_sampling_bias            =   "weekly_sampling_bias.tsv"
+        File    matrix_genomes_unit_corrected   =   "matrix_genomes_unit_corrected.tsv"
+        File    selected_sequences              =   "selected_sequences.txt"
+        File    selected_metadata               =   "selected_metadata.tsv"
+        File    sampling_stats                  =   "sampling_stats.txt"
+        Int     num_selected                    =   read_int("NUM_OUT")
+        Int     max_ram_gb                      =   ceil(read_float("MEM_BYTES")/1000000000)
+        Int     runtime_sec                     =   ceil(read_float("UPTIME_SEC"))
+        String  cpu_load                        =   read_string("CPU_LOAD")
+    }
+}
+
 task multi_align_mafft_ref {
   input {
     File         reference_fasta
diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl
index 8fb09ecd8..5053c77e6 100644
--- a/pipes/WDL/tasks/tasks_nextstrain.wdl
+++ b/pipes/WDL/tasks/tasks_nextstrain.wdl
@@ -450,7 +450,7 @@ task nextstrain_build_subsample {
         File?  drop_list
 
         Int    machine_mem_gb = 50
-        String docker = "nextstrain/base:build-20211012T204409Z"
+        String docker = "nextstrain/base:build-20230905T192825Z"
         String nextstrain_ncov_repo_commit = "30435fb9ec8de2f045167fb90adfec12f123e80a"
         Int    disk_size = 750
     }
@@ -594,7 +594,7 @@ task nextstrain_build_subsample {
 task nextstrain_ncov_defaults {
     input {
         String nextstrain_ncov_repo_commit = "30435fb9ec8de2f045167fb90adfec12f123e80a"
-        String docker                      = "nextstrain/base:build-20211012T204409Z"
+        String docker                      = "nextstrain/base:build-20230905T192825Z"
         Int    disk_size = 50
     }
     command {
@@ -632,7 +632,7 @@ task nextstrain_deduplicate_sequences {
         Boolean error_on_seq_diff = false
 
         String nextstrain_ncov_repo_commit = "30435fb9ec8de2f045167fb90adfec12f123e80a"
-        String docker                      = "nextstrain/base:build-20211012T204409Z"
+        String docker                      = "nextstrain/base:build-20230905T192825Z"
         Int disk_size = 750
     }
 
@@ -686,7 +686,7 @@ task nextstrain_ncov_sanitize_gisaid_data {
         String? prefix_to_strip
 
         String nextstrain_ncov_repo_commit = "30435fb9ec8de2f045167fb90adfec12f123e80a"
-        String docker                      = "nextstrain/base:build-20211012T204409Z"
+        String docker                      = "nextstrain/base:build-20230905T192825Z"
         Int    disk_size = 750
     }
 
@@ -762,7 +762,7 @@ task filter_subsample_sequences {
         Array[String]? exclude_where
         Array[String]? include_where
 
-        String         docker = "nextstrain/base:build-20211012T204409Z"
+        String         docker = "nextstrain/base:build-20230905T192825Z"
         Int            disk_size = 750
     }
     parameter_meta {
@@ -846,14 +846,14 @@ task filter_sequences_to_list {
         File         sequences
         Array[File]? keep_list
 
-        String       out_fname = sub(sub(basename(sequences), ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta")
-        String       docker = "nextstrain/base:build-20211012T204409Z"
+        String       out_fname = sub(sub(basename(sequences, ".zst"), ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta")
+        String       docker = "quay.io/broadinstitute/viral-core:2.1.33" # "nextstrain/base:build-20211012T204409Z"
         Int          disk_size = 750
     }
     parameter_meta {
         sequences: {
           description: "Set of sequences (unaligned fasta or aligned fasta -- one sequence per genome) or variants (vcf format) to subsample using augur filter.",
-          patterns: ["*.fasta", "*.fa", "*.vcf", "*.vcf.gz"]
+          patterns: ["*.fasta", "*.fa", "*.fasta.zst", "*.vcf", "*.vcf.gz"]
         }
         keep_list: {
           description: "List of strain ids.",
@@ -876,13 +876,14 @@ task filter_sequences_to_list {
                 echo filtering fasta file
     python3 <<CODE
     import Bio.SeqIO
+    import util.file
     keep_list = set()
     with open('keep_list.txt', 'rt') as inf:
         keep_list = set(line.strip() for line in inf)
     n_total = 0
     n_kept = 0
-    with open('~{sequences}', 'rt') as inf:
-        with open('~{out_fname}', 'wt') as outf:
+    with util.file.open_or_gzopen('~{sequences}', 'rt') as inf:
+        with util.file.open_or_gzopen('~{out_fname}', 'wt') as outf:
             for seq in Bio.SeqIO.parse(inf, 'fasta'):
                 n_total += 1
                 if seq.id in keep_list:
@@ -1143,7 +1144,7 @@ task augur_mafft_align {
         Boolean fill_gaps = true
         Boolean remove_reference = true
 
-        String  docker = "nextstrain/base:build-20211012T204409Z"
+        String  docker = "nextstrain/base:build-20230905T192825Z"
         Int     disk_size = 750
     }
     command <<<
@@ -1216,7 +1217,7 @@ task augur_mask_sites {
         File   sequences
         File?  mask_bed
 
-        String docker = "nextstrain/base:build-20211012T204409Z"
+        String docker = "nextstrain/base:build-20230905T192825Z"
         Int    disk_size = 750
     }
     parameter_meta {
@@ -1275,7 +1276,7 @@ task draft_augur_tree {
 
         Int     cpus = 64
         Int     machine_mem_gb = 32
-        String  docker = "nextstrain/base:build-20211012T204409Z"
+        String  docker = "nextstrain/base:build-20230905T192825Z"
         Int     disk_size = 1250
     }
     parameter_meta {
@@ -1288,7 +1289,7 @@ task draft_augur_tree {
     command <<<
         set -e
         augur version > VERSION
-        AUGUR_RECURSION_LIMIT=10000 augur tree --alignment "~{msa_or_vcf}" \
+        AUGUR_RECURSION_LIMIT=100000 augur tree --alignment "~{msa_or_vcf}" \
             --output "~{out_basename}_~{method}.nwk" \
             --method "~{method}" \
             --substitution-model ~{default="GTR" substitution_model} \
@@ -1344,7 +1345,7 @@ task refine_augur_tree {
         String?  divergence_units = "mutations"
         File?    vcf_reference
 
-        String   docker = "nextstrain/base:build-20211012T204409Z"
+        String   docker = "nextstrain/base:build-20230905T192825Z"
         Int      disk_size = 750
         Int      machine_mem_gb = 75
     }
@@ -1358,7 +1359,7 @@ task refine_augur_tree {
     command <<<
         set -e
         augur version > VERSION
-        AUGUR_RECURSION_LIMIT=10000 augur refine \
+        AUGUR_RECURSION_LIMIT=100000 augur refine \
             --tree "~{raw_tree}" \
             --alignment "~{msa_or_vcf}" \
             --metadata "~{metadata}" \
@@ -1418,14 +1419,14 @@ task ancestral_traits {
         Float?        sampling_bias_correction
 
         Int           machine_mem_gb = 32
-        String        docker = "nextstrain/base:build-20211012T204409Z"
+        String        docker = "nextstrain/base:build-20230905T192825Z"
         Int           disk_size = 750
     }
     String out_basename = basename(tree, '.nwk')
     command <<<
         set -e
         augur version > VERSION
-        AUGUR_RECURSION_LIMIT=10000 augur traits \
+        AUGUR_RECURSION_LIMIT=100000 augur traits \
             --tree "~{tree}" \
             --metadata "~{metadata}" \
             --columns ~{sep=" " columns} \
@@ -1471,7 +1472,7 @@ task ancestral_tree {
         File?    vcf_reference
         File?    output_vcf
 
-        String   docker = "nextstrain/base:build-20211012T204409Z"
+        String   docker = "nextstrain/base:build-20230905T192825Z"
         Int      disk_size = 300
     }
     parameter_meta {
@@ -1484,7 +1485,7 @@ task ancestral_tree {
     command <<<
         set -e
         augur version > VERSION
-        AUGUR_RECURSION_LIMIT=10000 augur ancestral \
+        AUGUR_RECURSION_LIMIT=100000 augur ancestral \
             --tree "~{tree}" \
             --alignment "~{msa_or_vcf}" \
             --output-node-data "~{out_basename}_nt_muts.json" \
@@ -1532,14 +1533,14 @@ task translate_augur_tree {
         File?  vcf_reference_output
         File?  vcf_reference
 
-        String docker = "nextstrain/base:build-20211012T204409Z"
+        String docker = "nextstrain/base:build-20230905T192825Z"
         Int    disk_size = 300
     }
     String out_basename = basename(tree, '.nwk')
     command <<<
         set -e
         augur version > VERSION
-        AUGUR_RECURSION_LIMIT=10000 augur translate --tree "~{tree}" \
+        AUGUR_RECURSION_LIMIT=500000 augur translate --tree "~{tree}" \
             --ancestral-sequences "~{nt_muts}" \
             --reference-sequence "~{genbank_gb}" \
             ~{"--vcf-reference-output " + vcf_reference_output} \
@@ -1589,14 +1590,14 @@ task tip_frequencies {
         Boolean  include_internal_nodes = false
 
         Int      machine_mem_gb = 64
-        String   docker = "nextstrain/base:build-20211012T204409Z"
+        String   docker = "nextstrain/base:build-20230905T192825Z"
         String   out_basename = basename(tree, '.nwk')
         Int      disk_size = 200
     }
     command <<<
         set -e
         augur version > VERSION
-        AUGUR_RECURSION_LIMIT=10000 augur frequencies \
+        AUGUR_RECURSION_LIMIT=100000 augur frequencies \
             --method "~{method}" \
             --tree "~{tree}" \
             --metadata "~{metadata}" \
@@ -1645,20 +1646,20 @@ task assign_clades_to_nodes {
     input {
         File tree_nwk
         File nt_muts_json
-        File aa_muts_json
+        File? aa_muts_json
         File ref_fasta
         File clades_tsv
 
-        String docker = "nextstrain/base:build-20211012T204409Z"
+        String docker = "nextstrain/base:build-20230905T192825Z"
         Int    disk_size = 300
     }
     String out_basename = basename(basename(tree_nwk, ".nwk"), "_timetree")
     command <<<
         set -e
         augur version > VERSION
-        AUGUR_RECURSION_LIMIT=10000 augur clades \
+        AUGUR_RECURSION_LIMIT=100000 augur clades \
         --tree "~{tree_nwk}" \
-        --mutations "~{nt_muts_json}" "~{aa_muts_json}" \
+        --mutations "~{nt_muts_json}" ~{'"' + aa_muts_json + '"'} \
         --reference "~{ref_fasta}" \
         --clades "~{clades_tsv}" \
         --output-node-data "~{out_basename}_clades.json"
@@ -1694,14 +1695,14 @@ task augur_import_beast {
         String? tip_date_delimiter
 
         Int     machine_mem_gb = 3
-        String  docker = "nextstrain/base:build-20211012T204409Z"
+        String  docker = "nextstrain/base:build-20230905T192825Z"
         Int     disk_size = 150
     }
     String tree_basename = basename(beast_mcc_tree, ".tree")
     command <<<
         set -e
         augur version > VERSION
-        AUGUR_RECURSION_LIMIT=10000 augur import beast \
+        AUGUR_RECURSION_LIMIT=100000 augur import beast \
             --mcc "~{beast_mcc_tree}" \
             --output-tree "~{tree_basename}.nwk" \
             --output-node-data "~{tree_basename}.json" \
@@ -1755,7 +1756,7 @@ task export_auspice_json {
         String out_basename = basename(basename(tree, ".nwk"), "_timetree")
 
         Int    machine_mem_gb = 64
-        String docker = "nextstrain/base:build-20211012T204409Z"
+        String docker = "nextstrain/base:build-20230905T192825Z"
         Int    disk_size = 300
     }
     
@@ -1803,7 +1804,7 @@ task export_auspice_json {
         echo --auspice-config >> exportargs
         echo "~{auspice_config}" >> exportargs
 
-        (export AUGUR_RECURSION_LIMIT=15000; cat exportargs | grep . | tr '\n' '\0' | xargs -0 -t augur export v2 \
+        (export AUGUR_RECURSION_LIMIT=100000; cat exportargs | grep . | tr '\n' '\0' | xargs -0 -t augur export v2 \
             ~{"--metadata " + sample_metadata} \
             ~{"--lat-longs " + lat_longs_tsv} \
             ~{"--colors " + colors_tsv} \
diff --git a/pipes/WDL/workflows/augur_from_msa_with_subsampler.wdl b/pipes/WDL/workflows/augur_from_msa_with_subsampler.wdl
new file mode 100644
index 000000000..e10fda486
--- /dev/null
+++ b/pipes/WDL/workflows/augur_from_msa_with_subsampler.wdl
@@ -0,0 +1,171 @@
+version 1.0
+
+import "../tasks/tasks_interhost.wdl" as interhost
+import "../tasks/tasks_nextstrain.wdl" as nextstrain
+import "../tasks/tasks_reports.wdl" as reports
+import "../tasks/tasks_utils.wdl" as utils
+
+workflow augur_from_msa_with_subsampler {
+    meta {
+        description: "Build trees, and convert to json representation suitable for Nextstrain visualization. See https://nextstrain.org/docs/getting-started/ and https://nextstrain-augur.readthedocs.io/en/stable/"
+        author: "Broad Viral Genomics"
+        email:  "viral-ngs@broadinstitute.org"
+        allowNestedInputs: true
+    }
+
+    input {
+        File           aligned_msa_fasta
+        Array[File]+   sample_metadata
+        File?          ref_fasta
+        File?          genbank_gb
+        File           auspice_config
+        File?          clades_tsv
+        Array[String]? ancestral_traits_to_infer
+        File?          mask_bed
+    }
+
+    parameter_meta {
+        aligned_msa_fasta: {
+          description: "Multiple sequence alignment (aligned fasta).",
+          patterns: ["*.fasta", "*.fa", "*.fasta.gz", "*.fa.gz", "*.fasta.zst", "*.fa.zst"]
+        }
+        sample_metadata: {
+          description: "Metadata in tab-separated text format. See https://nextstrain-augur.readthedocs.io/en/stable/faq/metadata.html for details. At least one tab file must be provided--if multiple are provided, they will be joined via a full left outer join using the 'strain' column as the join ID.",
+          patterns: ["*.txt", "*.tsv", "*.txt.gz", "*.txt.zst", "*.tsv.gz", "*.tsv.zst"]
+        }
+        ref_fasta: {
+          description: "A reference assembly (not included in assembly_fastas) to align assembly_fastas against. Typically from NCBI RefSeq or similar.",
+          patterns: ["*.fasta", "*.fa"]
+        }
+        genbank_gb: {
+          description: "A 'genbank' formatted gene annotation file that is used to calculate coding consequences of observed mutations. Must correspond to the same coordinate space as ref_fasta. Typically downloaded from the same NCBI accession number as ref_fasta.",
+          patterns: ["*.gb", "*.gbf"]
+        }
+        ancestral_traits_to_infer: {
+          description: "A list of metadata traits to use for ancestral node inference (see https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/traits.html). Multiple traits may be specified; must correspond exactly to column headers in metadata file. Omitting these values will skip ancestral trait inference, and ancestral nodes will not have estimated values for metadata."
+        }
+        auspice_config: {
+          description: "A file specifying options to customize the auspice export; see: https://nextstrain.github.io/auspice/customise-client/introduction",
+          patterns: ["*.json", "*.txt"]
+        }
+        clades_tsv: {
+          description: "A TSV file containing clade mutation positions in four columns: [clade  gene    site    alt]; see: https://nextstrain.org/docs/tutorials/defining-clades",
+          patterns: ["*.tsv", "*.txt"]
+        }
+        mask_bed: {
+          description: "Optional list of sites to mask when building trees.",
+          patterns: ["*.bed"]
+        }
+    }
+
+    # merge tsvs if necessary
+    if(length(sample_metadata)>1) {
+        call utils.tsv_join {
+            input:
+                input_tsvs   = sample_metadata,
+                id_col       = 'strain',
+                out_basename = "metadata-merged",
+                out_suffix   = ".txt.zst"
+        }
+    }
+
+    # subsample and filter genomic data based on epi case data
+    call interhost.subsample_by_cases {
+        input:
+            metadata = select_first(flatten([[tsv_join.out_tsv], sample_metadata]))
+    }
+    call nextstrain.filter_sequences_to_list {
+        input:
+            sequences = aligned_msa_fasta,
+            keep_list = [subsample_by_cases.selected_sequences]
+    }
+
+    # standard augur pipeline
+    if(defined(mask_bed)) {
+        call nextstrain.augur_mask_sites {
+            input:
+                sequences = filter_sequences_to_list.filtered_fasta,
+                mask_bed  = mask_bed
+        }
+    }
+    File masked_sequences = select_first([augur_mask_sites.masked_sequences, filter_sequences_to_list.filtered_fasta])
+    call nextstrain.draft_augur_tree {
+        input:
+            msa_or_vcf = masked_sequences
+    }
+    call nextstrain.refine_augur_tree {
+        input:
+            raw_tree   = draft_augur_tree.aligned_tree,
+            msa_or_vcf = masked_sequences,
+            metadata   = subsample_by_cases.selected_metadata
+    }
+    if(defined(ancestral_traits_to_infer) && length(select_first([ancestral_traits_to_infer,[]]))>0) {
+        call nextstrain.ancestral_traits {
+            input:
+                tree     = refine_augur_tree.tree_refined,
+                metadata = subsample_by_cases.selected_metadata,
+                columns  = select_first([ancestral_traits_to_infer,[]])
+        }
+    }
+    call nextstrain.tip_frequencies {
+        input:
+            tree     = refine_augur_tree.tree_refined,
+            metadata = subsample_by_cases.selected_metadata
+    }
+    call nextstrain.ancestral_tree {
+        input:
+            tree       = refine_augur_tree.tree_refined,
+            msa_or_vcf = masked_sequences
+    }
+    if(defined(genbank_gb)) {
+        call nextstrain.translate_augur_tree {
+            input:
+                tree       = refine_augur_tree.tree_refined,
+                nt_muts    = ancestral_tree.nt_muts_json,
+                genbank_gb = select_first([genbank_gb])
+        }
+    }
+    if(defined(clades_tsv) && defined(ref_fasta)) {
+        call nextstrain.assign_clades_to_nodes {
+            input:
+                tree_nwk     = refine_augur_tree.tree_refined,
+                nt_muts_json = ancestral_tree.nt_muts_json,
+                aa_muts_json = translate_augur_tree.aa_muts_json,
+                ref_fasta    = select_first([ref_fasta]),
+                clades_tsv   = select_first([clades_tsv])
+        }
+    }
+    call nextstrain.export_auspice_json {
+        input:
+            tree            = refine_augur_tree.tree_refined,
+            sample_metadata = subsample_by_cases.selected_metadata,
+            node_data_jsons = select_all([
+                                refine_augur_tree.branch_lengths,
+                                ancestral_traits.node_data_json,
+                                ancestral_tree.nt_muts_json,
+                                translate_augur_tree.aa_muts_json,
+                                assign_clades_to_nodes.node_clade_data_json]),
+            auspice_config  = auspice_config
+    }
+
+    output {
+        File        selected_metadata     = subsample_by_cases.selected_metadata
+        File        sampling_stats_file   = subsample_by_cases.sampling_stats
+
+        File        masked_subsampled_msa = masked_sequences
+        
+        File        ml_tree               = draft_augur_tree.aligned_tree
+        File        time_tree             = refine_augur_tree.tree_refined
+        
+        Array[File] node_data_jsons       = select_all([
+                    refine_augur_tree.branch_lengths,
+                    ancestral_traits.node_data_json,
+                    ancestral_tree.nt_muts_json,
+                    translate_augur_tree.aa_muts_json,
+                    assign_clades_to_nodes.node_clade_data_json])
+
+        File        auspice_input_json    = export_auspice_json.virus_json
+        File        tip_frequencies_json  = tip_frequencies.node_data_json
+        File        root_sequence_json    = export_auspice_json.root_sequence_json
+    }
+}
\ No newline at end of file
diff --git a/pipes/WDL/workflows/sarscov2_nextstrain.wdl b/pipes/WDL/workflows/sarscov2_nextstrain.wdl
index 7e35e18ee..b0cbabc98 100644
--- a/pipes/WDL/workflows/sarscov2_nextstrain.wdl
+++ b/pipes/WDL/workflows/sarscov2_nextstrain.wdl
@@ -15,7 +15,7 @@ workflow sarscov2_nextstrain {
     }
 
     input {
-        Array[File]+    assembly_fastas=["gs://nextstrain-data/files/ncov/open/sequences.fasta.xz"]
+        Array[File]+    assembly_fastas=["gs://nextstrain-data/files/ncov/open/sequences.fasta.zst"]
         Array[File]+    sample_metadata_tsvs=["gs://nextstrain-data/files/ncov/open/metadata.tsv.gz"]
         File?           ref_fasta
         Int             min_unambig_genome = 27000
@@ -57,7 +57,7 @@ workflow sarscov2_nextstrain {
     call utils.zcat {
         input:
             infiles     = assembly_fastas,
-            output_name = "all_samples_combined_assembly.fasta"
+            output_name = "all_samples_combined_assembly.fasta.zst"
     }
 
     call nextstrain.nextstrain_deduplicate_sequences as dedup_seqs {
@@ -215,4 +215,4 @@ workflow sarscov2_nextstrain {
       File             root_sequence_json   = export_auspice_json.root_sequence_json
       File             auspice_input_json   = export_auspice_json.virus_json
     }
-}
\ No newline at end of file
+}
diff --git a/pipes/WDL/workflows/sarscov2_nextstrain_aligned_input.wdl b/pipes/WDL/workflows/sarscov2_nextstrain_aligned_input.wdl
index e6f822985..52b0da5fe 100644
--- a/pipes/WDL/workflows/sarscov2_nextstrain_aligned_input.wdl
+++ b/pipes/WDL/workflows/sarscov2_nextstrain_aligned_input.wdl
@@ -57,7 +57,7 @@ workflow sarscov2_nextstrain_aligned_input {
     call utils.zcat {
         input:
             infiles     = aligned_sequences_fasta,
-            output_name = "all_samples_combined_assembly.fasta.xz"
+            output_name = "all_samples_combined_assembly.fasta.zst"
     }
 
     #### merge metadata, compute derived cols
@@ -200,4 +200,4 @@ workflow sarscov2_nextstrain_aligned_input {
       File             root_sequence_json   = export_auspice_json.root_sequence_json
       File             auspice_input_json   = export_auspice_json.virus_json
     }
-}
\ No newline at end of file
+}
diff --git a/pipes/WDL/workflows/subsample_by_casecounts.wdl b/pipes/WDL/workflows/subsample_by_casecounts.wdl
new file mode 100644
index 000000000..75ee2b397
--- /dev/null
+++ b/pipes/WDL/workflows/subsample_by_casecounts.wdl
@@ -0,0 +1,20 @@
+version 1.0
+
+import "../tasks/tasks_interhost.wdl" as interhost
+
+workflow subsampler_only {
+
+    call interhost.subsample_by_cases
+
+    output {
+        File    genome_matrix_days_file             =   subsample_by_cases.genome_matrix_days
+        File    matrix_genomes_unit_file            =   subsample_by_cases.matrix_genomes_unit
+        File    matrix_cases_unit_file              =   subsample_by_cases.matrix_cases_unit
+        File    weekly_sampling_proportions_file    =   subsample_by_cases.weekly_sampling_proportions
+		File    weekly_sampling_bias_file           =   subsample_by_cases.weekly_sampling_bias
+		File    matrix_genomes_unit_corrected_file  =   subsample_by_cases.matrix_genomes_unit_corrected
+        File    selected_sequences_file             =   subsample_by_cases.selected_sequences
+        File    selected_metadata_file              =   subsample_by_cases.selected_metadata
+        File    sampling_stats_file                 =   subsample_by_cases.sampling_stats
+    }
+}
diff --git a/requirements-modules.txt b/requirements-modules.txt
index b20c89a98..4d112e34f 100644
--- a/requirements-modules.txt
+++ b/requirements-modules.txt
@@ -5,7 +5,7 @@ broadinstitute/viral-phylo=2.1.20.2
 broadinstitute/py3-bio=0.1.2
 broadinstitute/beast-beagle-cuda=1.10.5pre
 broadinstitute/ncbi-tools=2.10.7.10
-nextstrain/base=build-20211012T204409Z
+nextstrain/base=build-20230905T192825Z
 andersenlabapps/ivar=1.3.1
 quay.io/staphb/pangolin=4.3.1-pdata-1.22
 nextstrain/nextclade=2.12.0