Skip to content

Commit

Permalink
Fix install on M1 chips. Improve argument parsing. Updated README.
Browse files Browse the repository at this point in the history
  • Loading branch information
AlbertDominguez committed Jun 7, 2022
1 parent ba220a4 commit 8f6c847
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 41 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,22 @@ The successful usage of this package requires a computer with enough RAM to be a

### Software requirements

The package has been tested on both Linux (CentOS 7.9.2009, Ubuntu 18.04.5 LTS) and MacOS (BigSur 11.2.3, Intel). If using GPUs, make sure CUDA drivers are properly installed.
The package has been tested on both Linux (CentOS 7.9.2009, Ubuntu 18.04.5 LTS) and MacOS (BigSur 11.2.3, Intel and Monterey 12.3.1, M1). If using GPUs, make sure CUDA drivers are properly installed.

We recommend creating a fresh Python 3.9 environment using `virtualenv` (or `conda`), and then install the package `neural-admixture` there. As an example, for `virtualenv`, one should launch the following commands:

```console
> virtualenv --python=python3.9 ~/venv/nadmenv
> source ~/venv/nadmenv/bin/activate
(nadmenv) > pip3 install neural-admixture
(nadmenv) > pip install neural-admixture
```

## Installation Guide

The package can be easily installed in at most a few minutes using `pip` (make sure to add the `--upgrade` flag if updating the version):

```console
(nadmenv) > pip3 install neural-admixture
(nadmenv) > pip install neural-admixture
```

## Usage
Expand Down Expand Up @@ -113,7 +113,7 @@ For this command to work, files `./outputs/nadm_test.pt` and `./outputs/nadm_tes

As also mentioned in the paper, Neural ADMIXTURE can be used to learn a function (through the encoder) given the results of the frequency matrix `P`/`F` of another algorithm so out-of-training data inference can be performed using the structure learnt by the other algorithm. The following arguments should be used to run Neural ADMIXTURE in this mode:

- `--initialization pretrained`: indicates than an ADMIXTURE-like `.P` file must be loaded to initialize the decoder weights. The path of the file is specified using the `--init_file` argument.
- `--initialization pretrained`: indicates that an ADMIXTURE-like `.P` file must be loaded to initialize the decoder weights. The path of the file is specified using the `--init_file` argument.
- `--freeze_decoder`: indicates that the decoder weights will be frozen during training.

If the second argument is skipped, then the decoder weights will be updated and the solution won't yield exactly the same `P`/`F` matrix that was used as input.
Expand Down
9 changes: 4 additions & 5 deletions neural_admixture/entry.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import argparse
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
log = logging.getLogger(__name__)

def main():
assert len(sys.argv) > 1, 'Please provide either the argument "train" or "infer" to choose running mode.'
assert len(sys.argv) > 2, 'Please provide either the argument "train" or "infer" to choose running mode.'
if sys.argv[1] == 'train':
from src import train
sys.exit(train.main())
sys.exit(train.main(sys.argv[2:]))
if sys.argv[1] == 'infer':
from src import inference
sys.exit(inference.main())
sys.exit(inference.main(sys.argv[2:]))
log.error(f'Invalid argument {sys.argv[1]}. Please run either "neural-admixture train" or "neural-admixture infer"')
sys.exit(1)
sys.exit(1)
7 changes: 2 additions & 5 deletions neural_admixture/src/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
log = logging.getLogger(__name__)

def main():
args = utils.parse_infer_args()
def main(argv):
args = utils.parse_infer_args(argv)
log.info('Will use GPU' if torch.cuda.is_available() else 'No GPUs available.')
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
data_file_str = args.data_path
Expand Down Expand Up @@ -40,6 +40,3 @@ def main():
log.info('Exiting...')
logging.shutdown()
return 0

if __name__ == '__main__':
sys.exit(main())
6 changes: 2 additions & 4 deletions neural_admixture/src/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ def fit_model(trX, args, valX=None, trY=None, valY=None):
log.info('Optimization process finished.')
return model, device

def main():
args = utils.parse_train_args()
def main(argv):
args = utils.parse_train_args(argv)
tr_file, val_file = args.data_path, args.validation_data_path
tr_pops_f, val_pops_f = args.populations_path, args.validation_populations_path

Expand All @@ -108,5 +108,3 @@ def main():
logging.shutdown()
return 0

if __name__ == '__main__':
sys.exit(main())
19 changes: 8 additions & 11 deletions neural_admixture/src/utils.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
import argparse
import gc
import logging
import numpy as np
import os
import sys
import time
import torch
import wandb
from itertools import permutations

from src.snp_reader import SNPReader

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
log = logging.getLogger(__name__)

def parse_train_args():
parser = argparse.ArgumentParser()
parser.add_argument('mode', choices=['train', 'infer'], help='Choose between modes.')
def parse_train_args(argv):
parser = argparse.ArgumentParser(prog='neural-admixture train',
description='Rapid population clustering with autoencoders - training mode')
parser.add_argument('--learning_rate', required=False, default=0.0001, type=float, help='Learning rate')
parser.add_argument('--max_epochs', required=False, type=int, default=50, help='Maximum number of epochs')
parser.add_argument('--initialization', required=False, type=str, default = 'pckmeans',
Expand Down Expand Up @@ -48,17 +45,17 @@ def parse_train_args():
parser.add_argument('--name', required=True, type=str, help='Experiment/model name')
parser.add_argument('--batch_size', required=False, default=400, type=int, help='Batch size')
parser.add_argument('--supervised_loss_weight', required=False, default=0.05, type=float, help='Weight given to the supervised loss')
return parser.parse_args()
return parser.parse_args(argv)

def parse_infer_args():
parser = argparse.ArgumentParser()
parser.add_argument('mode', choices=['train', 'infer'], help='Choose between modes')
def parse_infer_args(argv):
parser = argparse.ArgumentParser(prog='neural-admixture infer',
description='Rapid population clustering with autoencoders - inference mode')
parser.add_argument('--out_name', required=True, type=str, help='Name used to output files on inference mode')
parser.add_argument('--save_dir', required=True, type=str, help='Load model from this directory')
parser.add_argument('--data_path', required=True, type=str, help='Path containing the main data')
parser.add_argument('--name', required=True, type=str, help='Trained experiment/model name')
parser.add_argument('--batch_size', required=False, default=400, type=int, help='Batch size')
return parser.parse_args()
return parser.parse_args(argv)

def initialize_wandb(run_name, trX, valX, args, out_path, silent=True):
if run_name is None:
Expand Down
11 changes: 6 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
Cython>=0.29.30
codetiming==1.3.0
h5py==3.1.0
h5py>=3.1.0
matplotlib==3.3.4
numpy>=1.21.0
pandas==1.2.4
pandas_plink==2.2.9
py_pcha==0.1.3
scikit-allel==1.3.5
scikit-learn==0.24.1
scikit-learn>=0.24.1
setuptools==50.3.1
torch==1.7.1
twine
wandb==0.10.21
torch>=1.7.1
wandb>=0.12.17
15 changes: 8 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name='neural-admixture',
version='1.1.2',
version='1.1.4',
long_description=(Path(__file__).parent / 'README.md').read_text(),
long_description_content_type='text/markdown',
description='Population clustering with autoencoders',
Expand All @@ -17,23 +17,24 @@
packages=find_packages('neural_admixture/')+['.'],
package_dir={"": "neural_admixture"},
python_requires=">=3.8",
install_requires=['codetiming==1.3.0',
'h5py==3.1.0',
install_requires=['Cython>=0.29.30',
'codetiming==1.3.0',
'h5py>=3.1.0',
'matplotlib==3.3.4',
'numpy>=1.21.0',
'pandas==1.2.4',
'pandas_plink==2.2.9',
'py_pcha==0.1.3',
'scikit-allel==1.3.5',
'scikit-learn==0.24.1',
'scikit-learn>=0.24.1',
'setuptools==50.3.1',
'torch==1.7.1',
'wandb==0.10.21'],
'torch>=1.7.1',
'wandb>=0.12.17'],
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Science/Research',
'Operating System :: POSIX :: Linux',
'Operating System :: MacOS',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
],
)

0 comments on commit 8f6c847

Please sign in to comment.