From 8f6c847618d5964250a29e7b15308758466c2796 Mon Sep 17 00:00:00 2001 From: AlbertDominguez Date: Mon, 6 Jun 2022 17:42:09 -0700 Subject: [PATCH] Fix install on M1 chips. Improve argument parsing. Updated README. --- README.md | 8 ++++---- neural_admixture/entry.py | 9 ++++----- neural_admixture/src/inference.py | 7 ++----- neural_admixture/src/train.py | 6 ++---- neural_admixture/src/utils.py | 19 ++++++++----------- requirements.txt | 11 ++++++----- setup.py | 15 ++++++++------- 7 files changed, 34 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index b447df5..a5bfde5 100644 --- a/README.md +++ b/README.md @@ -20,14 +20,14 @@ The successful usage of this package requires a computer with enough RAM to be a ### Software requirements -The package has been tested on both Linux (CentOS 7.9.2009, Ubuntu 18.04.5 LTS) and MacOS (BigSur 11.2.3, Intel). If using GPUs, make sure CUDA drivers are properly installed. +The package has been tested on both Linux (CentOS 7.9.2009, Ubuntu 18.04.5 LTS) and MacOS (BigSur 11.2.3, Intel and Monterey 12.3.1, M1). If using GPUs, make sure CUDA drivers are properly installed. We recommend creating a fresh Python 3.9 environment using `virtualenv` (or `conda`), and then install the package `neural-admixture` there. As an example, for `virtualenv`, one should launch the following commands: ```console > virtualenv --python=python3.9 ~/venv/nadmenv > source ~/venv/nadmenv/bin/activate -(nadmenv) > pip3 install neural-admixture +(nadmenv) > pip install neural-admixture ``` ## Installation Guide @@ -35,7 +35,7 @@ We recommend creating a fresh Python 3.9 environment using `virtualenv` (or `con The package can be easily installed in at most a few minutes using `pip` (make sure to add the `--upgrade` flag if updating the version): ```console -(nadmenv) > pip3 install neural-admixture +(nadmenv) > pip install neural-admixture ``` ## Usage @@ -113,7 +113,7 @@ For this command to work, files `./outputs/nadm_test.pt` and `./outputs/nadm_tes As also mentioned in the paper, Neural ADMIXTURE can be used to learn a function (through the encoder) given the results of the frequency matrix `P`/`F` of another algorithm so out-of-training data inference can be performed using the structure learnt by the other algorithm. The following arguments should be used to run Neural ADMIXTURE in this mode: -- `--initialization pretrained`: indicates than an ADMIXTURE-like `.P` file must be loaded to initialize the decoder weights. The path of the file is specified using the `--init_file` argument. +- `--initialization pretrained`: indicates that an ADMIXTURE-like `.P` file must be loaded to initialize the decoder weights. The path of the file is specified using the `--init_file` argument. - `--freeze_decoder`: indicates that the decoder weights will be frozen during training. If the second argument is skipped, then the decoder weights will be updated and the solution won't yield exactly the same `P`/`F` matrix that was used as input. diff --git a/neural_admixture/entry.py b/neural_admixture/entry.py index cf24fd1..1ab62d4 100644 --- a/neural_admixture/entry.py +++ b/neural_admixture/entry.py @@ -1,4 +1,3 @@ -import argparse import logging import sys @@ -6,12 +5,12 @@ log = logging.getLogger(__name__) def main(): - assert len(sys.argv) > 1, 'Please provide either the argument "train" or "infer" to choose running mode.' + assert len(sys.argv) > 2, 'Please provide either the argument "train" or "infer" to choose running mode.' if sys.argv[1] == 'train': from src import train - sys.exit(train.main()) + sys.exit(train.main(sys.argv[2:])) if sys.argv[1] == 'infer': from src import inference - sys.exit(inference.main()) + sys.exit(inference.main(sys.argv[2:])) log.error(f'Invalid argument {sys.argv[1]}. Please run either "neural-admixture train" or "neural-admixture infer"') - sys.exit(1) \ No newline at end of file + sys.exit(1) diff --git a/neural_admixture/src/inference.py b/neural_admixture/src/inference.py index 04f0149..12af9f9 100644 --- a/neural_admixture/src/inference.py +++ b/neural_admixture/src/inference.py @@ -8,8 +8,8 @@ logging.basicConfig(stream=sys.stdout, level=logging.INFO) log = logging.getLogger(__name__) -def main(): - args = utils.parse_infer_args() +def main(argv): + args = utils.parse_infer_args(argv) log.info('Will use GPU' if torch.cuda.is_available() else 'No GPUs available.') device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') data_file_str = args.data_path @@ -40,6 +40,3 @@ def main(): log.info('Exiting...') logging.shutdown() return 0 - -if __name__ == '__main__': - sys.exit(main()) \ No newline at end of file diff --git a/neural_admixture/src/train.py b/neural_admixture/src/train.py index 136ffd5..85409d1 100644 --- a/neural_admixture/src/train.py +++ b/neural_admixture/src/train.py @@ -93,8 +93,8 @@ def fit_model(trX, args, valX=None, trY=None, valY=None): log.info('Optimization process finished.') return model, device -def main(): - args = utils.parse_train_args() +def main(argv): + args = utils.parse_train_args(argv) tr_file, val_file = args.data_path, args.validation_data_path tr_pops_f, val_pops_f = args.populations_path, args.validation_populations_path @@ -108,5 +108,3 @@ def main(): logging.shutdown() return 0 -if __name__ == '__main__': - sys.exit(main()) diff --git a/neural_admixture/src/utils.py b/neural_admixture/src/utils.py index e47c4df..2b80c2d 100644 --- a/neural_admixture/src/utils.py +++ b/neural_admixture/src/utils.py @@ -1,22 +1,19 @@ import argparse -import gc import logging import numpy as np import os import sys -import time import torch import wandb -from itertools import permutations from src.snp_reader import SNPReader logging.basicConfig(stream=sys.stdout, level=logging.INFO) log = logging.getLogger(__name__) -def parse_train_args(): - parser = argparse.ArgumentParser() - parser.add_argument('mode', choices=['train', 'infer'], help='Choose between modes.') +def parse_train_args(argv): + parser = argparse.ArgumentParser(prog='neural-admixture train', + description='Rapid population clustering with autoencoders - training mode') parser.add_argument('--learning_rate', required=False, default=0.0001, type=float, help='Learning rate') parser.add_argument('--max_epochs', required=False, type=int, default=50, help='Maximum number of epochs') parser.add_argument('--initialization', required=False, type=str, default = 'pckmeans', @@ -48,17 +45,17 @@ def parse_train_args(): parser.add_argument('--name', required=True, type=str, help='Experiment/model name') parser.add_argument('--batch_size', required=False, default=400, type=int, help='Batch size') parser.add_argument('--supervised_loss_weight', required=False, default=0.05, type=float, help='Weight given to the supervised loss') - return parser.parse_args() + return parser.parse_args(argv) -def parse_infer_args(): - parser = argparse.ArgumentParser() - parser.add_argument('mode', choices=['train', 'infer'], help='Choose between modes') +def parse_infer_args(argv): + parser = argparse.ArgumentParser(prog='neural-admixture infer', + description='Rapid population clustering with autoencoders - inference mode') parser.add_argument('--out_name', required=True, type=str, help='Name used to output files on inference mode') parser.add_argument('--save_dir', required=True, type=str, help='Load model from this directory') parser.add_argument('--data_path', required=True, type=str, help='Path containing the main data') parser.add_argument('--name', required=True, type=str, help='Trained experiment/model name') parser.add_argument('--batch_size', required=False, default=400, type=int, help='Batch size') - return parser.parse_args() + return parser.parse_args(argv) def initialize_wandb(run_name, trX, valX, args, out_path, silent=True): if run_name is None: diff --git a/requirements.txt b/requirements.txt index a5f90dd..ca1d18c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,13 @@ +Cython>=0.29.30 codetiming==1.3.0 -h5py==3.1.0 +h5py>=3.1.0 matplotlib==3.3.4 +numpy>=1.21.0 pandas==1.2.4 pandas_plink==2.2.9 py_pcha==0.1.3 scikit-allel==1.3.5 -scikit-learn==0.24.1 +scikit-learn>=0.24.1 setuptools==50.3.1 -torch==1.7.1 -twine -wandb==0.10.21 +torch>=1.7.1 +wandb>=0.12.17 diff --git a/setup.py b/setup.py index 2e0749b..9fffcf0 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name='neural-admixture', - version='1.1.2', + version='1.1.4', long_description=(Path(__file__).parent / 'README.md').read_text(), long_description_content_type='text/markdown', description='Population clustering with autoencoders', @@ -17,23 +17,24 @@ packages=find_packages('neural_admixture/')+['.'], package_dir={"": "neural_admixture"}, python_requires=">=3.8", - install_requires=['codetiming==1.3.0', - 'h5py==3.1.0', + install_requires=['Cython>=0.29.30', + 'codetiming==1.3.0', + 'h5py>=3.1.0', 'matplotlib==3.3.4', + 'numpy>=1.21.0', 'pandas==1.2.4', 'pandas_plink==2.2.9', 'py_pcha==0.1.3', 'scikit-allel==1.3.5', - 'scikit-learn==0.24.1', + 'scikit-learn>=0.24.1', 'setuptools==50.3.1', - 'torch==1.7.1', - 'wandb==0.10.21'], + 'torch>=1.7.1', + 'wandb>=0.12.17'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Science/Research', 'Operating System :: POSIX :: Linux', 'Operating System :: MacOS', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', ], )