Fix install on M1 chips. Improve argument parsing. Updated README.

AI-sandbox · Jun 7, 2022 · 8f6c847 · 8f6c847
1 parent ba220a4
commit 8f6c847
Show file tree

Hide file tree

Showing 7 changed files with 34 additions and 41 deletions.
diff --git a/README.md b/README.md
@@ -20,22 +20,22 @@ The successful usage of this package requires a computer with enough RAM to be a
 
 ### Software requirements
 
-The package has been tested on both Linux (CentOS 7.9.2009, Ubuntu 18.04.5 LTS) and MacOS (BigSur 11.2.3, Intel). If using GPUs, make sure CUDA drivers are properly installed.
+The package has been tested on both Linux (CentOS 7.9.2009, Ubuntu 18.04.5 LTS) and MacOS (BigSur 11.2.3, Intel and Monterey 12.3.1, M1). If using GPUs, make sure CUDA drivers are properly installed.
 
 We recommend creating a fresh Python 3.9 environment using `virtualenv` (or `conda`), and then install the package `neural-admixture` there. As an example, for `virtualenv`, one should launch the following commands:
 
 ```console
 > virtualenv --python=python3.9 ~/venv/nadmenv
 > source ~/venv/nadmenv/bin/activate
-(nadmenv) > pip3 install neural-admixture
+(nadmenv) > pip install neural-admixture
 ```
 
 ## Installation Guide
 
 The package can be easily installed in at most a few minutes using `pip` (make sure to add the `--upgrade` flag if updating the version):
 
 ```console
-(nadmenv) > pip3 install neural-admixture
+(nadmenv) > pip install neural-admixture
 ```
 
 ## Usage 
@@ -113,7 +113,7 @@ For this command to work, files `./outputs/nadm_test.pt` and `./outputs/nadm_tes
 
 As also mentioned in the paper, Neural ADMIXTURE can be used to learn a function (through the encoder) given the results of the frequency matrix `P`/`F` of another algorithm so out-of-training data inference can be performed using the structure learnt by the other algorithm. The following arguments should be used to run Neural ADMIXTURE in this mode:
 
-- `--initialization pretrained`: indicates than an ADMIXTURE-like `.P` file must be loaded to initialize the decoder weights. The path of the file is specified using the `--init_file` argument.
+- `--initialization pretrained`: indicates that an ADMIXTURE-like `.P` file must be loaded to initialize the decoder weights. The path of the file is specified using the `--init_file` argument.
 - `--freeze_decoder`: indicates that the decoder weights will be frozen during training.
 
 If the second argument is skipped, then the decoder weights will be updated and the solution won't yield exactly the same `P`/`F` matrix that was used as input.

diff --git a/neural_admixture/entry.py b/neural_admixture/entry.py
@@ -1,17 +1,16 @@
-import argparse
 import logging
 import sys
 
 logging.basicConfig(stream=sys.stdout, level=logging.INFO)
 log = logging.getLogger(__name__)
 
 def main():
-    assert len(sys.argv) > 1, 'Please provide either the argument "train" or "infer" to choose running mode.'
+    assert len(sys.argv) > 2, 'Please provide either the argument "train" or "infer" to choose running mode.'
     if sys.argv[1] == 'train':
         from src import train
-        sys.exit(train.main())
+        sys.exit(train.main(sys.argv[2:]))
     if sys.argv[1] == 'infer':
         from src import inference
-        sys.exit(inference.main())
+        sys.exit(inference.main(sys.argv[2:]))
     log.error(f'Invalid argument {sys.argv[1]}. Please run either "neural-admixture train" or "neural-admixture infer"')
-    sys.exit(1)
+    sys.exit(1)
diff --git a/neural_admixture/src/inference.py b/neural_admixture/src/inference.py
@@ -8,8 +8,8 @@
 logging.basicConfig(stream=sys.stdout, level=logging.INFO)
 log = logging.getLogger(__name__)
 
-def main():
-    args = utils.parse_infer_args()
+def main(argv):
+    args = utils.parse_infer_args(argv)
     log.info('Will use GPU' if torch.cuda.is_available() else 'No GPUs available.')
     device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
     data_file_str = args.data_path
@@ -40,6 +40,3 @@ def main():
     log.info('Exiting...')
     logging.shutdown()
     return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/neural_admixture/src/train.py b/neural_admixture/src/train.py
@@ -93,8 +93,8 @@ def fit_model(trX, args, valX=None, trY=None, valY=None):
     log.info('Optimization process finished.')
     return model, device
 
-def main():
-    args = utils.parse_train_args()
+def main(argv):
+    args = utils.parse_train_args(argv)
     tr_file, val_file = args.data_path, args.validation_data_path
     tr_pops_f, val_pops_f = args.populations_path, args.validation_populations_path
 
@@ -108,5 +108,3 @@ def main():
     logging.shutdown()
     return 0
 
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/neural_admixture/src/utils.py b/neural_admixture/src/utils.py
@@ -1,22 +1,19 @@
 import argparse
-import gc
 import logging
 import numpy as np
 import os
 import sys
-import time
 import torch
 import wandb
-from itertools import permutations
 
 from src.snp_reader import SNPReader
 
 logging.basicConfig(stream=sys.stdout, level=logging.INFO)
 log = logging.getLogger(__name__)
 
-def parse_train_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('mode', choices=['train', 'infer'], help='Choose between modes.')
+def parse_train_args(argv):
+    parser = argparse.ArgumentParser(prog='neural-admixture train',
+                                     description='Rapid population clustering with autoencoders - training mode')
     parser.add_argument('--learning_rate', required=False, default=0.0001, type=float, help='Learning rate')
     parser.add_argument('--max_epochs', required=False, type=int, default=50, help='Maximum number of epochs')
     parser.add_argument('--initialization', required=False, type=str, default = 'pckmeans',
@@ -48,17 +45,17 @@ def parse_train_args():
     parser.add_argument('--name', required=True, type=str, help='Experiment/model name')
     parser.add_argument('--batch_size', required=False, default=400, type=int, help='Batch size')
     parser.add_argument('--supervised_loss_weight', required=False, default=0.05, type=float, help='Weight given to the supervised loss')
-    return parser.parse_args()
+    return parser.parse_args(argv)
 
-def parse_infer_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('mode', choices=['train', 'infer'], help='Choose between modes')
+def parse_infer_args(argv):
+    parser = argparse.ArgumentParser(prog='neural-admixture infer',
+                                     description='Rapid population clustering with autoencoders - inference mode')
     parser.add_argument('--out_name', required=True, type=str, help='Name used to output files on inference mode')
     parser.add_argument('--save_dir', required=True, type=str, help='Load model from this directory')
     parser.add_argument('--data_path', required=True, type=str, help='Path containing the main data')
     parser.add_argument('--name', required=True, type=str, help='Trained experiment/model name')
     parser.add_argument('--batch_size', required=False, default=400, type=int, help='Batch size')
-    return parser.parse_args()
+    return parser.parse_args(argv)
 
 def initialize_wandb(run_name, trX, valX, args, out_path, silent=True):
     if run_name is None:

diff --git a/requirements.txt b/requirements.txt
@@ -1,12 +1,13 @@
+Cython>=0.29.30
 codetiming==1.3.0
-h5py==3.1.0
+h5py>=3.1.0
 matplotlib==3.3.4
+numpy>=1.21.0
 pandas==1.2.4
 pandas_plink==2.2.9
 py_pcha==0.1.3
 scikit-allel==1.3.5
-scikit-learn==0.24.1
+scikit-learn>=0.24.1
 setuptools==50.3.1
-torch==1.7.1
-twine
-wandb==0.10.21
+torch>=1.7.1
+wandb>=0.12.17
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 
 setup(
     name='neural-admixture',
-    version='1.1.2',
+    version='1.1.4',
     long_description=(Path(__file__).parent / 'README.md').read_text(),
     long_description_content_type='text/markdown',
     description='Population clustering with autoencoders',
@@ -17,23 +17,24 @@
     packages=find_packages('neural_admixture/')+['.'],
     package_dir={"": "neural_admixture"},
     python_requires=">=3.8",
-    install_requires=['codetiming==1.3.0',
-                      'h5py==3.1.0',
+    install_requires=['Cython>=0.29.30',
+                      'codetiming==1.3.0',
+                      'h5py>=3.1.0',
                       'matplotlib==3.3.4',
+                      'numpy>=1.21.0',
                       'pandas==1.2.4',
                       'pandas_plink==2.2.9',
                       'py_pcha==0.1.3',
                       'scikit-allel==1.3.5',
-                      'scikit-learn==0.24.1',
+                      'scikit-learn>=0.24.1',
                       'setuptools==50.3.1',
-                      'torch==1.7.1',
-                      'wandb==0.10.21'],
+                      'torch>=1.7.1',
+                      'wandb>=0.12.17'],
     classifiers=[
         'Development Status :: 5 - Production/Stable',
         'Intended Audience :: Science/Research',
         'Operating System :: POSIX :: Linux',
         'Operating System :: MacOS',
-        'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: 3.9',
     ],
 )