add tests/conftest.py

jsxlei · May 2, 2024 · 003504e · 003504e
1 parent b3e6fae
commit 003504e
Show file tree

Hide file tree

Showing 5 changed files with 136 additions and 50 deletions.
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -0,0 +1,67 @@
+name: Test
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+  schedule:
+    - cron: "0 5 1,15 * *"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    defaults:
+      run:
+        shell: bash -e {0} # -e to fail on error
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: ubuntu-latest
+            python: "3.9"
+          - os: ubuntu-latest
+            python: "3.11"
+          - os: ubuntu-latest
+            python: "3.11"
+            pip-flags: "--pre"
+            name: PRE-RELEASE DEPENDENCIES
+
+    name: ${{ matrix.name }} Python ${{ matrix.python }}
+
+    env:
+      OS: ${{ matrix.os }}
+      PYTHON: ${{ matrix.python }}
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python }}
+          cache: "pip"
+          cache-dependency-path: "**/pyproject.toml"
+
+      - name: Install test dependencies
+        run: |
+          python -m pip install --upgrade pip wheel
+      - name: Install dependencies
+        run: |
+          pip install ${{ matrix.pip-flags }} ".[dev,test]"
+      - name: Test
+        env:
+          MPLBACKEND: agg
+          PLATFORM: ${{ matrix.os }}
+          DISPLAY: :42
+        run: |
+          coverage run -m pytest -v --color=yes
+      - name: Report coverage
+        run: |
+          coverage report
+      - name: Upload coverage
+        uses: codecov/codecov-action@v3
diff --git a/scalex/data.py b/scalex/data.py
@@ -228,6 +228,8 @@ def preprocessing_rna(
     if min_features is None: min_features = 600
     if n_top_features is None: n_top_features = 2000
     if target_sum is None: target_sum = 10000
+
+    # adata.layers['count'] = adata.X.copy()
 
     if log: log.info('Preprocessing')
     # if not issparse(adata.X):
@@ -251,7 +253,8 @@ def preprocessing_rna(
     if log: log.info('Log1p transforming')
     sc.pp.log1p(adata)
 
-    adata.raw = adata
+    adata.raw = adata # keep the normalized and log1p transformed data as raw gene expression for differential expression analysis
+
     if log: log.info('Finding variable features')
     if type(n_top_features) == int and n_top_features>0:
         sc.pp.highly_variable_genes(adata, n_top_genes=n_top_features, batch_key='batch') #, inplace=False, subset=True)
@@ -639,7 +642,9 @@ def load_data(
             log=log,
         )
     else:
-        if use_layer in adata.layers:
+        if use_layer == 'X':
+            adata.X = MaxAbsScaler().fit_transform(adata.X)
+        elif use_layer in adata.layers:
             adata.layers[use_layer] = MaxAbsScaler().fit_transform(adata.layers[use_layer])
         elif use_layer in adata.obsm:
             adata.obsm[use_layer] = MaxAbsScaler().fit_transform(adata.obsm[use_layer])

diff --git a/scalex/function.py b/scalex/function.py
@@ -389,6 +389,7 @@ def main():
         num_workers=args.num_workers,
         show=False,
     )
+
 
 if __name__ == '__main__':
     main()
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,43 @@
+import pytest
+import anndata as ad
+import numpy as np
+import pandas as pd
+
+np.random.seed(42)
+
+@pytest.fixture(scope='session')
+def adata_test(n_obs=16, n_vars=32, n_categories=3):
+    """
+    Creates a virtual AnnData object with random binary data for testing.
+
+    Parameters:
+    - n_obs (int): Number of observations (cells)
+    - n_vars (int): Number of variables (genes)
+    - n_categories (int): Number of categories for the categorical annotation
+
+    Returns:
+    - An AnnData object populated with binary data and annotations.
+    """
+    # Generate random binary data
+    X = np.random.randint(0, 2, size=(n_obs, n_vars))  # Random binary matrix (0s and 1s)
+
+    # Generate observation names and variable names
+    obs_names = [f"Cell_{i}" for i in range(n_obs)]
+    var_names = [f"Gene_{j}" for j in range(n_vars)]
+
+    # Create observation (cell) metadata
+    obs = pd.DataFrame({
+        'condition': np.random.choice([f"Condition_{i}" for i in range(1, n_categories + 1)], n_obs),
+        'batch': np.random.choice([i for i in range(1, n_categories + 1)], n_obs)
+    }, index=obs_names)
+
+    # Create variable (gene) metadata
+    var = pd.DataFrame({
+        'Gene_ID': var_names
+    }, index=var_names)
+
+    # Create AnnData object
+    adata = ad.AnnData(X=X, obs=obs, var=var)
+    adata.obs['batch'] = adata.obs['batch'].astype('category')
+
+    return adata
diff --git a/tests/test_scalex.py b/tests/test_scalex.py
@@ -1,57 +1,19 @@
 import pytest
 
 import scalex
+from scalex.function import SCALEX
+from scalex.net.vae import VAE
+from scalex.data import preprocessing_rna
 
-import anndata as ad
-import numpy as np
-import pandas as pd
 import torch
 
-def create_virtual_anndata(n_obs=16, n_vars=32, n_categories=3):
-    """
-    Creates a virtual AnnData object with random binary data for testing.
+def test_preprocess_rna(adata_test):
+    adata = adata_test.copy()
+    adata = preprocessing_rna(adata, min_cells=4, min_features=0)
+    assert adata.raw.shape == adata_test.shape
 
-    Parameters:
-    - n_obs (int): Number of observations (cells)
-    - n_vars (int): Number of variables (genes)
-    - n_categories (int): Number of categories for the categorical annotation
 
-    Returns:
-    - An AnnData object populated with binary data and annotations.
-    """
-    # Generate random binary data
-    X = np.random.randint(0, 2, size=(n_obs, n_vars))  # Random binary matrix (0s and 1s)
-
-    # Generate observation names and variable names
-    obs_names = [f"Cell_{i}" for i in range(n_obs)]
-    var_names = [f"Gene_{j}" for j in range(n_vars)]
-
-    # Create observation (cell) metadata
-    obs = pd.DataFrame({
-        'condition': np.random.choice([f"Condition_{i}" for i in range(1, n_categories + 1)], n_obs),
-        'batch': np.random.choice([i for i in range(1, n_categories + 1)], n_obs)
-    }, index=obs_names)
-
-    # Create variable (gene) metadata
-    var = pd.DataFrame({
-        'Gene_ID': var_names
-    }, index=var_names)
-
-    # Create AnnData object
-    adata = ad.AnnData(X=X, obs=obs, var=var)
-
-    return adata
-
-
-
-
-def test_scalex():
-    import scanpy as sc
-    from scalex.function import SCALEX
-    from scalex.net.vae import VAE
-
-    # Create a virtual AnnData object with binary data
-    adata_test = create_virtual_anndata()
+def test_scalex_forward(adata_test):
     n_domain = len(adata_test.obs['batch'].astype('category').cat.categories)
     x_dim = adata_test.X.shape[1]
 
@@ -69,6 +31,14 @@ def test_scalex():
 
     # Load the file
 
+
+def test_full_model(adata_test):
+    out = SCALEX(
+        adata_test, processed=True, min_cells=0, min_features=0, batch_size=2, max_iteration=10,
+    )
+    assert 'distances' in out.obsp
+    assert 'X_scalex_umap' in out.obsm
+
+
 if __name__ == '__main__':
-    test_scalex()
-
+    pytest.main([__file__])