From be37a41d6f83e4145bd4912cbe8bf6a24af80c29 Mon Sep 17 00:00:00 2001 From: Augustin Zidek Date: Fri, 5 Nov 2021 02:38:25 -0700 Subject: [PATCH] Explain better how to run the AlphaFold-Multimer system. * Remove a confusing example that folds multiple complexes at once. * Add examples on how to create the multimer input FASTA files. * Add a note about the `model_preset` flag to the API changes. PiperOrigin-RevId: 407774039 Change-Id: I80277c47febc977ba3956d0615944e62b0d5c3fc --- README.md | 118 +++++++++++++++++++++++++++++++++++++++++-- docker/run_docker.py | 18 +++---- run_alphafold.py | 24 ++++----- 3 files changed, 135 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 4b53bb3ee..3eadf0ee3 100644 --- a/README.md +++ b/README.md @@ -196,6 +196,10 @@ change the following: happens inside the Multimer model. * The `preset` flag in `run_alphafold.py` and `run_docker.py` was split into `db_preset` and `model_preset`. +* The models to use are not specified using `model_names` but rather using the + `model_preset` flag. If you want to customize which models are used for each + preset, you will have to modify the the `MODEL_PRESETS` dictionary in + `alphafold/model/config.py`. * Setting the `data_dir` flag is now needed when using `run_docker.py`. @@ -299,18 +303,124 @@ All steps are the same as when running the monomer system, but you will have to whether all input sequences in the given fasta file are prokaryotic. If that is not the case or the origin is unknown, set to `false` for that fasta. -An example that folds two protein complexes `multimer1` and `multimer2` where -the first is prokaryotic and the second isn't: +An example that folds a protein complex `multimer.fasta` that is prokaryotic: ```bash python3 docker/run_docker.py \ - --fasta_paths=multimer1.fasta,multimer2.fasta \ - --is_prokaryote_list=true,false \ + --fasta_paths=multimer.fasta \ + --is_prokaryote_list=true \ --max_template_date=2020-05-14 \ --model_preset=multimer \ --data_dir=$DOWNLOAD_DIR ``` +### Examples + +Below are examples on how to use AlphaFold in different scenarios. + +#### Folding a monomer + +Say we have a monomer with the sequence ``. The input fasta should be: + +```fasta +>sequence_name + +``` + +Then run the following command: + +```bash +python3 docker/run_docker.py \ + --fasta_paths=monomer.fasta \ + --max_template_date=2021-11-01 \ + --model_preset=monomer \ + --data_dir=$DOWNLOAD_DIR +``` + +#### Folding a homomer + +Say we have a homomer from a prokaryote with 3 copies of the same sequence +``. The input fasta should be: + +```fasta +>sequence_1 + +>sequence_2 + +>sequence_3 + +``` + +Then run the following command: + +```bash +python3 docker/run_docker.py \ + --fasta_paths=homomer.fasta \ + --is_prokaryote_list=true \ + --max_template_date=2021-11-01 \ + --model_preset=multimer \ + --data_dir=$DOWNLOAD_DIR +``` + +#### Folding a heteromer + +Say we have a heteromer A2B3 of unknown origin, i.e. with 2 copies of +`` and 3 copies of ``. The input fasta should be: + +```fasta +>sequence_1 + +>sequence_2 + +>sequence_3 + +>sequence_4 + +>sequence_5 + +``` + +Then run the following command: + +```bash +python3 docker/run_docker.py \ + --fasta_paths=heteromer.fasta \ + --is_prokaryote_list=false \ + --max_template_date=2021-11-01 \ + --model_preset=multimer \ + --data_dir=$DOWNLOAD_DIR +``` + +#### Folding multiple monomers one after another + +Say we have a two monomers, `monomer1.fasta` and `monomer2.fasta`. + +We can fold both sequentially by using the following command: + +```bash +python3 docker/run_docker.py \ + --fasta_paths=monomer1.fasta,monomer2.fasta \ + --max_template_date=2021-11-01 \ + --model_preset=monomer \ + --data_dir=$DOWNLOAD_DIR +``` + +#### Folding multiple multimers one after another + +Say we have a two multimers, `multimer1.fasta` and `multimer2.fasta`. Both are +from a prokaryotic organism. + +We can fold both sequentially by using the following command: + +```bash +python3 docker/run_docker.py \ + --fasta_paths=multimer1.fasta,multimer2.fasta \ + --is_prokaryote_list=true,true \ + --max_template_date=2021-11-01 \ + --model_preset=multimer \ + --data_dir=$DOWNLOAD_DIR +``` + ### AlphaFold output The outputs will be saved in a subdirectory of the directory provided via the diff --git a/docker/run_docker.py b/docker/run_docker.py index 4eec39c9e..5d0f9beb0 100644 --- a/docker/run_docker.py +++ b/docker/run_docker.py @@ -32,17 +32,17 @@ 'gpu_devices', 'all', 'Comma separated list of devices to pass to NVIDIA_VISIBLE_DEVICES.') flags.DEFINE_list( - 'fasta_paths', None, - 'Paths to FASTA files, each containing one sequence. Paths should be ' + 'fasta_paths', None, 'Paths to FASTA files, each containing a prediction ' + 'target that will be folded one after another. If a FASTA file contains ' + 'multiple sequences, then it will be folded as a multimer. Paths should be ' 'separated by commas. All FASTA paths must have a unique basename as the ' 'basename is used to name the output directories for each prediction.') -flags.DEFINE_list('is_prokaryote_list', None, 'Optional for multimer system, ' - 'not used by the single chain system. ' - 'This list should contain a boolean for each fasta ' - 'specifying true where the target complex is from a ' - 'prokaryote, and false where it is not, or where the ' - 'origin is unknown. These values determine the pairing ' - 'method for the MSA.') +flags.DEFINE_list( + 'is_prokaryote_list', None, 'Optional for multimer system, not used by the ' + 'single chain system. This list should contain a boolean for each fasta ' + 'specifying true where the target complex is from a prokaryote, and false ' + 'where it is not, or where the origin is unknown. These values determine ' + 'the pairing method for the MSA.') flags.DEFINE_string( 'output_dir', '/tmp/alphafold', 'Path to a directory that will store the results.') diff --git a/run_alphafold.py b/run_alphafold.py index 1d5403c1c..33fae99c8 100644 --- a/run_alphafold.py +++ b/run_alphafold.py @@ -43,18 +43,18 @@ logging.set_verbosity(logging.INFO) -flags.DEFINE_list('fasta_paths', None, 'Paths to FASTA files, each containing ' - 'a prediction target. Paths should be separated by commas. ' - 'All FASTA paths must have a unique basename as the ' - 'basename is used to name the output directories for ' - 'each prediction.') -flags.DEFINE_list('is_prokaryote_list', None, 'Optional for multimer system, ' - 'not used by the single chain system. ' - 'This list should contain a boolean for each fasta ' - 'specifying true where the target complex is from a ' - 'prokaryote, and false where it is not, or where the ' - 'origin is unknown. These values determine the pairing ' - 'method for the MSA.') +flags.DEFINE_list( + 'fasta_paths', None, 'Paths to FASTA files, each containing a prediction ' + 'target that will be folded one after another. If a FASTA file contains ' + 'multiple sequences, then it will be folded as a multimer. Paths should be ' + 'separated by commas. All FASTA paths must have a unique basename as the ' + 'basename is used to name the output directories for each prediction.') +flags.DEFINE_list( + 'is_prokaryote_list', None, 'Optional for multimer system, not used by the ' + 'single chain system. This list should contain a boolean for each fasta ' + 'specifying true where the target complex is from a prokaryote, and false ' + 'where it is not, or where the origin is unknown. These values determine ' + 'the pairing method for the MSA.') flags.DEFINE_string('data_dir', None, 'Path to directory of supporting data.') flags.DEFINE_string('output_dir', None, 'Path to a directory that will '