diff --git a/1-generate-sv-calls/manta.nf b/1-generate-sv-calls/manta.nf
new file mode 100644
index 0000000..c470fc2
--- /dev/null
+++ b/1-generate-sv-calls/manta.nf
@@ -0,0 +1,78 @@
+#!/usr/bin/env nextflow
+
+params.reference = 'ref.fa'
+
+reference_file = file(params.reference)
+
+Channel
+    .fromSRA(file('../library_list.txt').readLines())
+    .set{reads}
+
+process bwa_index {
+    publishDir 'bwa_index'
+    module 'bwa/bwa-0.7.17'
+
+    input:
+    file reference from reference_file
+
+    output:
+    file "${reference}.*" into reference_index
+
+    """ bwa index ${reference} """
+}
+
+process samtools_faidx {
+    module 'samtools/samtools-1.9'
+
+    input:
+    file reference from reference_file
+
+    output:
+    file "${reference}.fai" into faidx
+
+    """ samtools faidx ${reference} """
+}
+
+process align {
+    cpus 16
+    module 'bwa/bwa-0.7.17:samtools/samtools-1.9'
+    publishDir 'alignments'
+
+    input:
+    file ref from reference_file
+    file index from reference_index
+    set accession, file(both_ends) from reads
+
+    output:
+    file "${accession}.bam" into aligned
+    file "${accession}.bam.bai" into aligned_index
+
+    """
+    bwa mem -t ${task.cpus} ${ref} ${both_ends} | samtools view -bh - | \
+        samtools fixmate -m - - | samtools sort - | \
+        samtools markdup -r - ${accession}.bam
+    samtools index ${accession}.bam
+    """
+}
+
+process manta {
+    cpus 16
+    module 'biocompute/biocompute-modules'
+    module 'manta/manta-1.6.0'
+    publishDir 'results'
+
+    input:
+    file 'ref.fa' from reference_file
+    file 'ref.fa.fai' from faidx
+    file bams from aligned.collect()
+    file bais from aligned_index.collect()
+
+    output:
+    file "manta*" into results
+
+    """
+    bams=""; for bam in ${bams}; do bams+="--bam \$bam "; done
+    configManta.py \$bams --referenceFasta ref.fa --runDir manta
+    manta/runWorkflow.py -j ${task.cpus}
+    """
+}
diff --git a/1-generate-sv-calls/smoove.nf b/1-generate-sv-calls/smoove.nf
new file mode 100644
index 0000000..c63e969
--- /dev/null
+++ b/1-generate-sv-calls/smoove.nf
@@ -0,0 +1,137 @@
+#!/usr/bin/env nextflow
+
+params.reference = 'ref.fa'
+params.scratch = '/local/scratch/esrbhb'
+
+Channel
+    .fromSRA(file('../library_list.txt').readLines())
+    .set{reads}
+
+reference_file = file(params.reference)
+
+process bwa_index {
+    publishDir 'bwa_index'
+    module 'bwa/bwa-0.7.17'
+
+    input:
+    file reference from reference_file
+
+    output:
+    file "${reference}.*" into reference_index
+
+    """ bwa index ${reference} """
+}
+
+process samtools_faidx {
+    module 'samtools/samtools-1.9'
+
+    input:
+    file reference from reference_file
+
+    output:
+    file "${reference}.fai" into faidx
+
+    """ samtools faidx ${reference} """
+}
+
+process align {
+    cpus 16
+    module 'bwa/bwa-0.7.17:samtools/samtools-1.9'
+    publishDir 'alignments'
+
+    input:
+    file ref from reference_file
+    file index from reference_index
+    set accession, file(both_ends) from reads
+
+    output:
+    set accession, "${accession}.bam*" into aligned
+
+    """
+    bwa mem -R "@RG\\tID:${accession}\\tSM:${accession}\\tPL:ILLUMINA" \
+        -t ${task.cpus} ${ref} ${both_ends} | samtools view -bh - | \
+        samtools fixmate -m - - | samtools sort - | \
+        samtools markdup -r - ${accession}.bam
+    samtools index ${accession}.bam
+    """
+}
+
+aligned.into { aligned_for_smoove_call; aligned_for_smoove_genotype }
+faidx.into { faidx_for_smoove_call; faidx_for_smoove_merge;
+             faidx_for_smoove_genotype }
+
+process smoove_call {
+    container 'brentp/smoove:v0.2.3'
+    publishDir 'unmerged'
+    cpus 8
+
+    input:
+    file 'ref.fa' from reference_file
+    file 'ref.fa.fai' from faidx_for_smoove_call
+    set accession, file(bam) from aligned_for_smoove_call
+
+    output:
+    file "${accession}-smoove.genotyped.vcf.gz" into unmerged
+
+    """
+    smoove call --name ${accession} --fasta ref.fa -p ${task.cpus} \
+        --genotype ${accession}.bam
+    """
+}
+
+process smoove_merge {
+    container 'brentp/smoove:v0.2.3'
+
+    input:
+    file 'ref.fa' from reference_file
+    file 'ref.fa.fai' from faidx_for_smoove_merge
+    file all_unmerged from unmerged.collect()
+
+    output:
+    file "merged.sites.vcf.gz" into merged
+
+    """
+    smoove merge --name merged -f ref.fa ${all_unmerged}
+    """
+}
+
+merged_vcf_faidx_and_bams = merged.combine(faidx_for_smoove_genotype)
+    .combine(aligned_for_smoove_genotype)
+
+process smoove_genotype {
+    container 'brentp/smoove:v0.2.3'
+
+    input:
+    file 'ref.fa' from reference_file
+    set 'merged.sites.vcf.gz', 'ref.fa.fai', accession,
+        file(bam) from merged_vcf_faidx_and_bams
+
+    output:
+    file "${accession}-joint-smoove.genotyped.vcf.gz" into joint_genotyped
+    file "${accession}-joint-smoove.genotyped.vcf.gz.csi" into j_g_index
+
+    """
+    export TMPDIR=\$PWD
+    smoove genotype -d -x --name ${accession}-joint --fasta ref.fa \
+        --vcf merged.sites.vcf.gz ${accession}.bam
+    echo "done!"
+    """
+}
+
+process smoove_paste {
+    container 'brentp/smoove:v0.2.3'
+    publishDir 'output'
+
+    input:
+    file all_vcfs from joint_genotyped.collect()
+    file all_indexes from j_g_index.collect()
+
+    output:
+    file "pasted.smoove.square.vcf.gz" into pasted
+
+    """
+    smoove paste --name pasted ${all_vcfs}
+    """
+}
+
+
diff --git a/2-filter-sv-calls/filter_vcfs.sh b/2-filter-sv-calls/filter_vcfs.sh
new file mode 100755
index 0000000..8ab2a3d
--- /dev/null
+++ b/2-filter-sv-calls/filter_vcfs.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+# filter manta calls
+cut -f2,4 ../sample_keys.tsv > sample2id.tsv
+bcftools filter -Oz \
+    -i 'INFO/SVTYPE="DEL" && INFO/SVLEN < -500 && INFO/SVLEN > -100000' \
+    ../1-generate-sv-calls/manta/results/manta/results/variants/diploidSV.vcf.gz \
+    | bcftools reheader -s sample2id.tsv - > manta_deletions_only.vcf.gz
+rm sample2id.tsv
+
+# filter lumpy calls
+cut -f1,4 ../sample_keys.tsv > srr2id.tsv
+bcftools filter -Oz \
+    -i 'INFO/SVTYPE="DEL" && INFO/SVLEN < -500 && INFO/SVLEN > -100000' \
+    ../1-generate-sv-calls/lumpy/output/pasted.smoove.square.vcf.gz | \
+    bcftools reheader -s srr2id.tsv - > lumpy_deletions_only.vcf.gz
+rm srr2id.tsv
diff --git a/3-analyze-sv-calls/annotate_deletions.py b/3-analyze-sv-calls/annotate_deletions.py
new file mode 100755
index 0000000..a93d145
--- /dev/null
+++ b/3-analyze-sv-calls/annotate_deletions.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python3
+"""
+Given a vcf file containing deletions and a gff annotating the
+reference genome, annotate each of the deletions as affecting either
+an intergenic region, a regulatory region (+/- 2kb of a gene), an
+intron, a coding sequence, or some combination of these.
+"""
+
+import argparse
+import itertools
+import os
+import sys
+
+import gffutils
+import vcf
+
+
+def gff_type(gff_path):
+    """
+    argparse type function for GFF files. Uses gffutils to create
+    a database if one does not yet exist, and then returns that
+    database.
+    """
+    db_path = gff_path + ".db"
+    if gff_path.split(".")[-1] == "db":
+        return gffutils.FeatureDB(gff_path)
+    elif os.path.exists(db_path):
+        return gffutils.FeatureDB(db_path)
+    else:
+        print("Creating gff db...", file=sys.stderr)
+        return gffutils.create_db(
+            gff_path,
+            db_path,
+            # the id_spec is necessary because NCBI gff's do not follow
+            # the GFF specification
+            id_spec={"gene": "db_xref"},
+        )
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "-r",
+        "--regulatory-margin",
+        type=int,
+        default=2000,
+        help="distance up/downstream of a gene to consider "
+        "as affecting that gene, in bp [2000]",
+    )
+    parser.add_argument(
+        "vcf", help="the vcf file to annotate", type=lambda f: vcf.Reader(filename=f)
+    )
+    parser.add_argument(
+        "gff",
+        help="annotations of the reference genome, "
+        "either in NCBI gff or a pre-created sqlite db",
+        type=gff_type,
+    )
+    return parser.parse_args()
+
+
+def get_deletion_effects(deletion_record, gff_db, regulatory_margin=2000):
+    """
+    Figure out the effects of a deletion using a gff.
+
+    Args:
+        deletion_record (vcf.Record): a vcf Record representing the
+            deletion
+        gff_db (gffutils.FeatureDB): a gffutils DB of a genome's
+            annotations
+        regulatory_margin (int): the amount of sequence on either side
+            of the deletion to look in for genes to be classified as
+            having their regulatory regions affected
+    """
+    affected_genes = set()
+    intergenic = True
+    regulatory, intronic, coding = [False] * 3
+
+    # first, go through all the features that overlap the deletion
+    # and use them to set the above booleans and add any affected
+    # genes to affected_genes
+    features_in_deletion = gff_db.region(
+        seqid=deletion_record.CHROM,
+        start=deletion_record.POS,
+        end=deletion_record.sv_end,
+    )
+    for feature in features_in_deletion:
+        if feature.featuretype == "gene":
+            affected_genes.add(feature.attributes["Name"][0].upper())
+            intergenic = False
+            intronic = True
+        elif feature.featuretype == "CDS":
+            coding = True
+            intronic = False
+
+    # next, look for any genes *near* the deletion
+    features_near_deletion = itertools.chain(
+        gff_db.region(
+            seqid=deletion_record.CHROM,
+            start=deletion_record.POS - regulatory_margin,
+            end=deletion_record.POS,
+        ),
+        gff_db.region(
+            seqid=deletion_record.CHROM,
+            start=deletion_record.sv_end,
+            end=deletion_record.sv_end + regulatory_margin,
+        ),
+    )
+    for feature in features_near_deletion:
+        if feature.featuretype == "gene":
+            gene_name = feature.attributes["Name"][0].upper()
+            # only consider this a deletion of a regulatory region if
+            # this gene has not been otherwise affected
+            if gene_name not in affected_genes:
+                regulatory = True
+                intergenic = False
+                affected_genes.add(gene_name)
+
+    return affected_genes, intergenic, regulatory, intronic, coding
+
+
+def annotate_deletion(record, affected_genes, intergenic, regulatory, intronic, coding):
+    """ adds INFO fields to a vcf record """
+    record.INFO["affected_genes"] = list(affected_genes)
+    # all these if statements are necessary because pyvcf adds
+    # unnecessary semicolons for false flags and empty fields
+    if intergenic:
+        record.INFO["intergenic"] = True
+    if regulatory:
+        record.INFO["regulatory"] = True
+    if intronic:
+        record.INFO["intronic"] = True
+    if coding:
+        record.INFO["coding"] = True
+    return record
+
+
+def add_info_fields_to_header(vcf_reader):
+    """
+    PyVCF uses a vcf.Reader to get a header and output it when a
+    vcf.Writer is created. This function takes a Reader and adds
+    some INFO fields to its header so that it can be used as a
+    template for the vcf this program outputs.
+
+    Args:
+        vcf_reader (vcf.Reader): reader with a file whose header we
+            want to add INFO fields to
+
+    Returns:
+        vcf_reader (vcf.Reader): the same reader that was input, but
+            with some new INFO fields
+    """
+    vcf_reader.infos["affected_genes"] = vcf.parser._Info(
+        id="affected_genes",
+        num=".",
+        type="String",
+        desc="List of genes affected by this deletion",
+        source=None,
+        version=None,
+    )
+    vcf_reader.infos["intergenic"] = vcf.parser._Info(
+        id="intergenic",
+        num="0",
+        type="Flag",
+        desc="This deletion does not affect any genes",
+        source=None,
+        version=None,
+    )
+    vcf_reader.infos["regulatory"] = vcf.parser._Info(
+        id="regulatory",
+        num="0",
+        type="Flag",
+        desc="This deletion occurs directly up- or downstream of gene(s)",
+        source=None,
+        version=None,
+    )
+    vcf_reader.infos["intronic"] = vcf.parser._Info(
+        id="intronic",
+        num="0",
+        type="Flag",
+        desc="This deletion affects the introns of one or more genes",
+        source=None,
+        version=None,
+    )
+    vcf_reader.infos["coding"] = vcf.parser._Info(
+        id="coding",
+        num="0",
+        type="Flag",
+        desc="This deletion affects the coding sequence of one or more genes",
+        source=None,
+        version=None,
+    )
+    return vcf_reader
+
+
+def main():
+    """ __main__ method for this file """
+    args = parse_args()
+
+    writer = vcf.Writer(sys.stdout, add_info_fields_to_header(args.vcf))
+    for record in args.vcf:
+        effects_tuple = get_deletion_effects(record, args.gff, args.regulatory_margin)
+        annotated_record = annotate_deletion(record, *effects_tuple)
+        writer.write_record(annotated_record)
+    writer.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/3-analyze-sv-calls/commands.sh b/3-analyze-sv-calls/commands.sh
new file mode 100644
index 0000000..e8284ff
--- /dev/null
+++ b/3-analyze-sv-calls/commands.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+# merge lumpy and manta deletions
+./merge_deletions.py \
+    ../vcf_filtering/lumpy_deletions_only.vcf.gz \
+    ../vcf_filtering/manta_deletions_only.vcf.gz | bgzip \
+    > merged_deletions.vcf.gz
+
+# count number of deletions per sample
+./count_variants_per_sample.py \
+    ../2-filter-sv-calls/lumpy_deletions_only.vcf.gz > lumpy_deletion_counts.tsv
+./count_variants_per_sample.py \
+    ../2-filter-sv-calls/manta_deletions_only.vcf.gz > manta_deletion_counts.tsv
+./count_variants_per_sample.py \
+    merged_deletions.vcf.gz > merged_deletion_counts.tsv
+
+# annotate the vcf
+./annotate_deletions.py merged_deletions.vcf.gz ../AstMex.db \
+    | bgzip > merged_annotated.vcf.gz
+
+# filter the vcf to count only deletions of intronic sequence
+bcftools filter -i intronic=1 -Oz merged_annotated.vcf.gz \
+    > intronic_deletions.vcf.gz
+./count_variants_per_sample.py \
+    intronic_deletions.vcf.gz > intron_deletion_counts.tsv
+
+# fitler the vcf to count only deletions of regulatory sequence
+bcftools filter -i regulatory=1 -Oz merged_annotated.vcf.gz \
+    > regulatory_deletions.vcf.gz
+./count_variants_per_sample.py \
+    regulatory_deletions.vcf.gz > regulatory_deletion_counts.tsv
+
+# filter the vcf to count only deletions of coding sequence
+bcftools filter -i coding=1 -Oz merged_annotated.vcf.gz \
+    > coding_deletions.vcf.gz
+./count_variants_per_sample.py \
+    coding_deletions.vcf.gz > coding_deletion_counts.tsv
diff --git a/3-analyze-sv-calls/count_variants_per_sample.py b/3-analyze-sv-calls/count_variants_per_sample.py
new file mode 100755
index 0000000..3b6b400
--- /dev/null
+++ b/3-analyze-sv-calls/count_variants_per_sample.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+"""
+Count the number of called variants per sample in a VCF file.
+"""
+
+import argparse
+import collections
+
+import vcf
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "vcf", help="the vcf file to analyze", type=lambda f: vcf.Reader(filename=f)
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    call_counts = collections.Counter()
+    hom_alt_counts = collections.Counter()
+    het_counts = collections.Counter()
+    for record in filter(lambda r: not r.is_filtered, args.vcf):
+        for call in filter(lambda s: not s.is_filtered, record.samples):
+            call_counts[call.sample] += 1
+            if call.is_variant:
+                if call.is_het:
+                    het_counts[call.sample] += 1
+                else:
+                    hom_alt_counts[call.sample] += 1
+
+    print("\t".join(["sample", "call_count", "hom_alt_count", "het_count"]))
+    for sample in call_counts.keys():
+        print(
+            "\t".join(
+                map(
+                    str,
+                    [
+                        sample,
+                        call_counts[sample],
+                        hom_alt_counts[sample],
+                        het_counts[sample],
+                    ],
+                )
+            )
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/3-analyze-sv-calls/merge_deletions.py b/3-analyze-sv-calls/merge_deletions.py
new file mode 100755
index 0000000..b0a885b
--- /dev/null
+++ b/3-analyze-sv-calls/merge_deletions.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python3
+"""
+Given two vcf files containing deletions, output a new vcf file
+containing only the deletions that appear in both inputs.
+"""
+
+import argparse
+from itertools import starmap
+import sys
+
+import vcf
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "-r",
+        "--reciprocal-overlap",
+        type=float,
+        default=0.5,
+        help="minimum reciprocal overlap to consider a "
+        "deletion in both files to be the same deletion [0.5]",
+    )
+    parser.add_argument(
+        "vcf1",
+        help="the first vcf file to merge",
+        type=lambda f: vcf.Reader(filename=f),
+    )
+    parser.add_argument(
+        "vcf2",
+        help="the second vcf file to merge",
+        type=lambda f: vcf.Reader(filename=f),
+    )
+    return parser.parse_args()
+
+
+def compare_records(record1, record2, contig_index):
+    """
+    Compares two vcf._Record instances by location to determine which
+    would come earlier in a vcf file.
+
+    Args:
+        record1, record2 (vcf._Record): two vcf records to compare
+        contig_index (dict): mapping CHROM names to their index in the
+            VCF header
+
+    Returns:
+        compare_value (int): >0 if record1 > record2, <0 if record1 <
+            record2; 0 if record1 and record2 overlap
+    """
+    if record1.CHROM != record2.CHROM:
+        return contig_index[record1.CHROM] - contig_index[record2.CHROM]
+    else:
+        if (record1.POS >= record2.POS and record1.POS <= record2.sv_end) or (
+            record1.sv_end >= record2.POS and record1.sv_end <= record2.sv_end
+        ):
+            return 0
+        else:
+            return record1.POS - record2.POS
+
+
+def reciprocal_overlap(record1, record2, min_reciprocal_overlap=0.5):
+    """
+    Given two vcf._Record instances representing deletions, determines
+    whether there is an overlap between them based on a minimum
+    reciprocal overlap. If there is no overlap, returns None; if there
+    is an overlap, returns the start and end points of that overlap.
+
+    Args:
+        record1, record2 (vcf._Record): the records to be intersected
+        min_reciprocal_overlap(float): the minimum reciprocal overlap
+            between the two records in order for them to be considered
+            overlapping. Reciprocal overlap is defined as
+                size_of_overlap / max(size_of_r1, size_of_r2)
+
+    Returns:
+        None if there is not an overlap between the two records, or
+            there is an overlap but it is smaller than
+            min_reciprocal_overlap
+        (overlap_start, overlap_end) if there is an overlap of
+            sufficient size between the two records
+    """
+    if record1.CHROM != record2.CHROM:
+        return 0
+
+    r1_deletion_size = record1.sv_end - record1.POS
+    r2_deletion_size = record2.sv_end - record2.POS
+
+    overlap_start = max(record1.POS, record2.POS)
+    overlap_end = min(record1.sv_end, record2.sv_end)
+    overlap_size = overlap_end - overlap_start
+    reciprocal_overlap = min(
+        overlap_size / r1_deletion_size, overlap_size / r2_deletion_size
+    )
+
+    return reciprocal_overlap
+
+
+def call_type_key(call):
+    if call.gt_type is None:
+        return -1
+    else:
+        return call.gt_type
+
+
+CallData = vcf.model.make_calldata_tuple(["GT"])
+
+
+def merge_calls(call1, call2):
+    """
+    Given two calls on the same sample, outputs a new call with the
+    lesser of the two genotypes; i.e., 0/0 < 0/1 < 1/1. Also simplifies
+    the call data, leaving it with only a GT field.
+
+    N.B. This function is not commutative, as the output call will have
+    site = call1.site.
+    """
+    return vcf.model._Call(
+        call1.site,
+        call1.sample,
+        CallData(min(call1, call2, key=call_type_key).data.GT),
+    )
+
+
+def make_sample_index_key(sample_index):
+    def sample_index_key(call):
+        return sample_index[call.sample]
+
+    return sample_index_key
+
+
+def merge_records(record1, record2, sample_index_key):
+    """
+    Takes two VCF records containing deletions and merges them into a
+    single record. Rather than trying to split hairs over where the
+    boundaries of the merged record should be when the called boundaries
+    are imprecise anyway, this function just uses the metadata
+    associated with the bigger deletion.
+
+    Args:
+        record1, record2 (vcf.model._Record): deletion records to
+            merge. This function does not check to make sure it's
+            reasonable to merge the two; it just takes the consensus
+            for each of the individual sample calls.
+        sample_index_key (function): function that maps _Call objects
+            to the index of their sample. `make_sample_index_key()` can
+            create one of these.
+    """
+    # figure out which record is bigger, so that we can use it as the
+    # template for the new record
+    small_record, big_record = sorted(
+        [record1, record2], key=lambda r: r.sv_end - r.POS
+    )
+
+    # sort the calls by sample index, and then merge each pair of calls
+    # for the same sample into a single call
+    big_record.samples = list(
+        starmap(
+            merge_calls,
+            zip(
+                sorted(big_record.samples, key=sample_index_key),
+                sorted(small_record.samples, key=sample_index_key),
+            ),
+        )
+    )
+    big_record.FORMAT = "GT"
+
+    return big_record
+
+
+def merge_all_deletions(reader1, reader2, min_reciprocal_overlap=0.5):
+    # god help us if the VCFs have headers in different orders
+    contig_index = {k: i for i, k in enumerate(reversed(reader1.contigs.keys()))}
+
+    # keep the samples in the order in which they appear in the reader1
+    # header
+    sample_index = {k: i for i, k in enumerate(reader1.samples)}
+    sample_index_key = make_sample_index_key(sample_index)
+
+    try:
+        record1, record2 = next(reader1), next(reader2)
+        while True:  # TODO please don't do this
+            while compare_records(record1, record2, contig_index) < 0:
+                record1 = next(reader1)
+            while compare_records(record1, record2, contig_index) > 0:
+                record2 = next(reader2)
+            overlap = reciprocal_overlap(record1, record2)
+            if overlap >= min_reciprocal_overlap:
+                yield merge_records(record1, record2, sample_index_key)
+            record1 = next(reader1)
+            record2 = next(reader2)
+    except StopIteration:
+        return
+
+
+def main():
+    args = parse_args()
+
+    writer = vcf.Writer(sys.stdout, args.vcf1)
+    # TODO fix output header
+    for record in merge_all_deletions(args.vcf1, args.vcf2, args.reciprocal_overlap):
+        writer.write_record(record)
+        writer.flush()
+    writer.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/README.md b/README.md
index cc6e389..b4a1629 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,34 @@
-# cavefish-genome-paper
+# astyanax-genome-paper
 Scripts and workflows used in Warren et al. (2021)
+
+This repository contains scripts and workflow used to analyze deletions in the
+paper: Warren et al. (2021) "A chromosome level genome of _Astyanax mexicanus_
+surface fish for comparing population-specific genetic differences contributing
+to trait evolution." _Nature Communications_ (in press)
+
+## Generating structural variant calls
+We generated structural variant calls with short reads for all of the libraries
+listed in `library_list.txt`, using two short-read SV-callers:
+[https://github.com/Illumina/manta](manta) and
+[https://github.com/arq5x/lumpy-sv](lumpy). For lumpy, we used the
+[https://github.com/brentp/smoove](smoove) pipeline as recommended.
+[https://www.nextflow.io/](nextflow) workflows for both are in
+`1-generate-sv-calls`. Please refer to the webpages for these software packages
+for requirements and installation instructions.
+
+## Filtering structural variant calls
+We filtered the structural variant calls generated by manta and lumpy to
+include only deletions within the size range (500,100000). A shell script for
+this is in `2-filter-sv-calls`.
+
+## Analyzing and counting structural variant calls
+We analyzed the deletion calls by counting the numbers of deletions per sample,
+merging the lumpy and manta results, and annotating the deletions based on
+whether they contain intronic, regulatory, or coding sequence, as described in
+the manuscript. Python scripts for counting, merging, and annotating are in
+`3-analyze-sv-calls`, as well as a bash script containing the commands we used
+to run these programs.
+
+## More information
+Please consult the paper for more information about the methods and results,
+and direct questions to the corresponding authors.
diff --git a/library_list.txt b/library_list.txt
new file mode 100644
index 0000000..fbb3481
--- /dev/null
+++ b/library_list.txt
@@ -0,0 +1,43 @@
+SRR1575270
+SRR1575271
+SRR1575272
+SRR1575273
+SRR1575274
+SRR1575275
+SRR1575276
+SRR1575277
+SRR1575278
+SRR1575279
+SRR1575280
+SRR1575281
+SRR1575282
+SRR1575283
+SRR1575284
+SRR1575285
+SRR1575286
+SRR1575287
+SRR1575288
+SRR1575289
+SRR1575290
+SRR1575291
+SRR1575292
+SRR1575293
+SRR1575294
+SRR1575295
+SRR1575296
+SRR1575297
+SRR1575298
+SRR1927184
+SRR1927212
+SRR1927214
+SRR1927215
+SRR1927218
+SRR1927221
+SRR1927224
+SRR1927228
+SRR1927232
+SRR1927233
+SRR1927234
+SRR1927235
+SRR1927236
+SRR1927237
diff --git a/sample_keys.tsv b/sample_keys.tsv
new file mode 100644
index 0000000..e2afbbf
--- /dev/null
+++ b/sample_keys.tsv
@@ -0,0 +1,43 @@
+SRR1575270	SAMPLE25	Astyanax mexicanus	Choy_01
+SRR1575271	SAMPLE36	Astyanax mexicanus	Choy_05
+SRR1575272	SAMPLE12	Astyanax mexicanus	Choy_06
+SRR1575273	SAMPLE1	Astyanax mexicanus	Choy_09
+SRR1575274	SAMPLE34	Astyanax mexicanus	Choy_10
+SRR1575275	SAMPLE31	Astyanax mexicanus	Choy_11
+SRR1575276	SAMPLE4	Astyanax mexicanus	Choy_12
+SRR1575277	SAMPLE29	Astyanax mexicanus	Choy_13
+SRR1575278	SAMPLE21	Astyanax mexicanus	Choy_14
+SRR1575279	SAMPLE30	Astyanax mexicanus	Pach_3
+SRR1575280	SAMPLE33	Astyanax mexicanus	Pach_7
+SRR1575281	SAMPLE32	Astyanax mexicanus	Pach_8
+SRR1575282	SAMPLE6	Astyanax mexicanus	Pach_9
+SRR1575283	SAMPLE8	Astyanax mexicanus	Pach_11
+SRR1575284	SAMPLE38	Astyanax mexicanus	Pach_12
+SRR1575285	SAMPLE23	Astyanax mexicanus	Pach_14
+SRR1575286	SAMPLE2	Astyanax mexicanus	Pach_15
+SRR1575287	SAMPLE27	Astyanax mexicanus	Pach_17
+SRR1575288	SAMPLE42	Astyanax mexicanus	Molino_2a
+SRR1575289	SAMPLE41	Astyanax mexicanus	Molino_7a
+SRR1575290	SAMPLE7	Astyanax mexicanus	Molino_9b
+SRR1575291	SAMPLE19	Astyanax mexicanus	Molino_10b
+SRR1575292	SAMPLE18	Astyanax mexicanus	Molino_11a
+SRR1575293	SAMPLE20	Astyanax mexicanus	Molino_12a
+SRR1575294	SAMPLE13	Astyanax mexicanus	Molino_13b
+SRR1575295	SAMPLE16	Astyanax mexicanus	Molino_14a
+SRR1575296	SAMPLE11	Astyanax mexicanus	Molino_15b
+SRR1575297	SAMPLE28	Astyanax mexicanus	Rascon_02
+SRR1575298	SAMPLE26	Astyanax mexicanus	Rascon_04
+SRR1927184	SAMPLE5	Astyanax mexicanus	Tinaja_11
+SRR1927212	SAMPLE39	Astyanax mexicanus	Tinaja_6
+SRR1927214	SAMPLE14	Astyanax mexicanus	Tinaja_12
+SRR1927215	SAMPLE15	Astyanax mexicanus	Tinaja_B
+SRR1927218	SAMPLE22	Astyanax mexicanus	Tinaja_2
+SRR1927221	SAMPLE37	Astyanax mexicanus	Tinaja_C
+SRR1927224	SAMPLE43	Astyanax mexicanus	Tinaja_3
+SRR1927228	SAMPLE35	Astyanax mexicanus	Tinaja_D
+SRR1927232	SAMPLE10	Astyanax mexicanus	Tinaja_5
+SRR1927233	SAMPLE3	Astyanax mexicanus	Tinaja_E
+SRR1927234	SAMPLE40	Astyanax mexicanus	Rascon_13
+SRR1927235	SAMPLE17	Astyanax mexicanus	Rascon_15
+SRR1927236	SAMPLE9	Astyanax mexicanus	Rascon_8
+SRR1927237	SAMPLE24	Astyanax mexicanus	Rascon_6