From 4ef26a5461f6b8d134ea66c8dcdfc6797d22f400 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 13 Jun 2024 16:14:37 +0100 Subject: [PATCH 01/31] Update metadata file in scanpy-scripts-tests.bats --- scanpy-scripts-tests.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy-scripts-tests.bats b/scanpy-scripts-tests.bats index 93af736d..b9794506 100755 --- a/scanpy-scripts-tests.bats +++ b/scanpy-scripts-tests.bats @@ -180,7 +180,7 @@ setup() { skip "$singlet_obs exists" fi - run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.sbatch2\n" {1..1350})" > $batch_obs + run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.s1\n" {1..1350})" > $batch_obs [ "$status" -eq 0 ] [ -f "$batch_obs" ] From 47598b6b4c909de13dbfc118c60691f68894adf2 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 13 Jun 2024 20:15:53 +0100 Subject: [PATCH 02/31] changing string to int --- scanpy-scripts-tests.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy-scripts-tests.bats b/scanpy-scripts-tests.bats index b9794506..0cf9b606 100755 --- a/scanpy-scripts-tests.bats +++ b/scanpy-scripts-tests.bats @@ -180,7 +180,7 @@ setup() { skip "$singlet_obs exists" fi - run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.s1\n" {1..1350})" > $batch_obs + run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(for i in {1..1350}; do printf "%d\n" $i; done;)" > $batch_obs [ "$status" -eq 0 ] [ -f "$batch_obs" ] From feac1bd82fc44d02bf3debc4ce8e759f3c1b2dbb Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 13 Jun 2024 20:20:00 +0100 Subject: [PATCH 03/31] change int loop to print single number --- scanpy-scripts-tests.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy-scripts-tests.bats b/scanpy-scripts-tests.bats index 0cf9b606..e2fe77bc 100755 --- a/scanpy-scripts-tests.bats +++ b/scanpy-scripts-tests.bats @@ -180,7 +180,7 @@ setup() { skip "$singlet_obs exists" fi - run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(for i in {1..1350}; do printf "%d\n" $i; done;)" > $batch_obs + run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(for i in {1..1350}; do printf "%d\n" 2; done;)" > $batch_obs [ "$status" -eq 0 ] [ -f "$batch_obs" ] From 226a08dff3dc28db6b73cb32c50352148ede4255 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 13 Jun 2024 20:45:26 +0100 Subject: [PATCH 04/31] Adds empty lines --- scanpy-scripts-tests.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy-scripts-tests.bats b/scanpy-scripts-tests.bats index e2fe77bc..4920a521 100755 --- a/scanpy-scripts-tests.bats +++ b/scanpy-scripts-tests.bats @@ -180,7 +180,7 @@ setup() { skip "$singlet_obs exists" fi - run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(for i in {1..1350}; do printf "%d\n" 2; done;)" > $batch_obs + run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(for i in {1..350}; do printf "\n"; done;)\n$(for i in {1..1000}; do printf "%d\n" 2; done;)" > $batch_obs [ "$status" -eq 0 ] [ -f "$batch_obs" ] From 58239a6d332d20dab55547fdd56cec9f8556c7c4 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 13 Jun 2024 20:50:14 +0100 Subject: [PATCH 05/31] fixes empty lines --- scanpy-scripts-tests.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy-scripts-tests.bats b/scanpy-scripts-tests.bats index 4920a521..370221e4 100755 --- a/scanpy-scripts-tests.bats +++ b/scanpy-scripts-tests.bats @@ -180,7 +180,7 @@ setup() { skip "$singlet_obs exists" fi - run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(for i in {1..350}; do printf "\n"; done;)\n$(for i in {1..1000}; do printf "%d\n" 2; done;)" > $batch_obs + run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n\n\n\n\n\n\n\n\n\n$(for i in {1..1340}; do printf "%d\n" 2; done;)" > $batch_obs [ "$status" -eq 0 ] [ -f "$batch_obs" ] From a22032cbab2df16f59f8b4d65cabeac56dfa4cf5 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 14 Jun 2024 10:56:36 +0100 Subject: [PATCH 06/31] Update scanpy-scripts-tests.bats --- scanpy-scripts-tests.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy-scripts-tests.bats b/scanpy-scripts-tests.bats index 370221e4..e875660b 100755 --- a/scanpy-scripts-tests.bats +++ b/scanpy-scripts-tests.bats @@ -180,7 +180,7 @@ setup() { skip "$singlet_obs exists" fi - run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n\n\n\n\n\n\n\n\n\n$(for i in {1..1340}; do printf "%d\n" 2; done;)" > $batch_obs + run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..})$(for i in {1..10}; do printf " \n"; done;)$(for i in {1..1340}; do printf "%d\n" 2; done;)" > $batch_obs [ "$status" -eq 0 ] [ -f "$batch_obs" ] From 525008377cf3eb982da13bf4a10d8368dcf9e095 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 14 Jun 2024 15:43:04 +0100 Subject: [PATCH 07/31] adds functionality to change multiple datatype to string --- scanpy_scripts/lib/_read.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/scanpy_scripts/lib/_read.py b/scanpy_scripts/lib/_read.py index 20e3ee67..5e2247e8 100644 --- a/scanpy_scripts/lib/_read.py +++ b/scanpy_scripts/lib/_read.py @@ -35,6 +35,12 @@ def read_10x( if extra_var: var_tbl = pd.read_csv(extra_var, sep="\t", header=0, index_col=0) + mixed_columns = columns_with_multiple_dtypes(var_tbl) + + # Convert mixed dtype columns to 'object' type to preserve all information + for column in mixed_columns: + var_tbl[column] = var_tbl[column].astype('string') + adata.var = adata.var.merge( var_tbl, how="left", @@ -43,3 +49,14 @@ def read_10x( suffixes=(False, False), ) return adata + + +def columns_with_multiple_dtypes(df): + mixed_dtype_columns = [] + for column in df.columns: + # Get unique dtypes in the column + unique_dtypes = df[column].apply(type).unique() + if len(unique_dtypes) > 1: + mixed_dtype_columns.append(column) + return mixed_dtype_columns + From dfb85b032d0c4d78224998ab903ddd150fa9ad6e Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 14 Jun 2024 16:07:38 +0100 Subject: [PATCH 08/31] satisfying `black` errors --- scanpy_scripts/lib/_mnn.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/scanpy_scripts/lib/_mnn.py b/scanpy_scripts/lib/_mnn.py index 84c6416f..436c7ad0 100644 --- a/scanpy_scripts/lib/_mnn.py +++ b/scanpy_scripts/lib/_mnn.py @@ -30,11 +30,7 @@ def mnn_correct(adata, key=None, key_added=None, var_subset=None, layer=None, ** batches = np.unique(adata.obs[key]) alldata = [] for batch in batches: - alldata.append( - adata[ - adata.obs[key] == batch, - ] - ) + alldata.append(adata[adata.obs[key] == batch,]) # Process var_subset into a list of strings that can be provided to # mnn_correct() From e544e16f67b29eb7e56fa93b998e8fde13b5ef24 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 14 Jun 2024 16:08:32 +0100 Subject: [PATCH 09/31] Satisfying `black` error --- scanpy_scripts/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scanpy_scripts/__init__.py b/scanpy_scripts/__init__.py index 2f222bbc..86f1bc48 100644 --- a/scanpy_scripts/__init__.py +++ b/scanpy_scripts/__init__.py @@ -1,6 +1,7 @@ """ Provides version, author and exports """ + import importlib.metadata __version__ = importlib.metadata.version("scanpy-scripts") From f95d276a0ccc44472f2f77200409f04907980f1b Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 14 Jun 2024 16:16:08 +0100 Subject: [PATCH 10/31] Fixes `black` error --- scanpy_scripts/lib/_read.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scanpy_scripts/lib/_read.py b/scanpy_scripts/lib/_read.py index 5e2247e8..7b9d3e99 100644 --- a/scanpy_scripts/lib/_read.py +++ b/scanpy_scripts/lib/_read.py @@ -37,10 +37,9 @@ def read_10x( var_tbl = pd.read_csv(extra_var, sep="\t", header=0, index_col=0) mixed_columns = columns_with_multiple_dtypes(var_tbl) - # Convert mixed dtype columns to 'object' type to preserve all information + # Convert mixed dtype columns to 'object' type to preserve all information for column in mixed_columns: - var_tbl[column] = var_tbl[column].astype('string') - + var_tbl[column] = var_tbl[column].astype('string') adata.var = adata.var.merge( var_tbl, how="left", @@ -50,7 +49,6 @@ def read_10x( ) return adata - def columns_with_multiple_dtypes(df): mixed_dtype_columns = [] for column in df.columns: @@ -59,4 +57,3 @@ def columns_with_multiple_dtypes(df): if len(unique_dtypes) > 1: mixed_dtype_columns.append(column) return mixed_dtype_columns - From 37748c1199e99d0de13a2a226e0d5456fd58a584 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 14 Jun 2024 16:19:48 +0100 Subject: [PATCH 11/31] Update `black` operation in python-package.yml --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 2e9db02f..63b9a8eb 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -29,7 +29,7 @@ jobs: - name: Run black manually run: | - black --check --verbose ./ + black --diff --verbose ./ # - name: Install dependencies # run: | From fb313c931db4f06ca7a94dad96f504c04da8b4ad Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 14 Jun 2024 16:28:00 +0100 Subject: [PATCH 12/31] reverting changes in scanpy-scripts-tests.bats --- scanpy-scripts-tests.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy-scripts-tests.bats b/scanpy-scripts-tests.bats index e875660b..93af736d 100755 --- a/scanpy-scripts-tests.bats +++ b/scanpy-scripts-tests.bats @@ -180,7 +180,7 @@ setup() { skip "$singlet_obs exists" fi - run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..})$(for i in {1..10}; do printf " \n"; done;)$(for i in {1..1340}; do printf "%d\n" 2; done;)" > $batch_obs + run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.sbatch2\n" {1..1350})" > $batch_obs [ "$status" -eq 0 ] [ -f "$batch_obs" ] From a47de2c569d58b4b0316b8780f8aaa4ed7b3b065 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 4 Jul 2024 16:19:47 +0100 Subject: [PATCH 13/31] changes var type and moves code to right column --- scanpy_scripts/lib/_read.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scanpy_scripts/lib/_read.py b/scanpy_scripts/lib/_read.py index 7b9d3e99..f1d1cee8 100644 --- a/scanpy_scripts/lib/_read.py +++ b/scanpy_scripts/lib/_read.py @@ -25,6 +25,12 @@ def read_10x( if extra_obs: obs_tbl = pd.read_csv(extra_obs, sep="\t", header=0, index_col=0) + mixed_columns = columns_with_multiple_dtypes(obs_tbl) + + # Convert mixed dtype columns to 'object' type to preserve all information + for column in mixed_columns: + obs_tbl[column] = obs_tbl[column].astype('str') + adata.obs = adata.obs.merge( obs_tbl, how="left", @@ -35,11 +41,7 @@ def read_10x( if extra_var: var_tbl = pd.read_csv(extra_var, sep="\t", header=0, index_col=0) - mixed_columns = columns_with_multiple_dtypes(var_tbl) - # Convert mixed dtype columns to 'object' type to preserve all information - for column in mixed_columns: - var_tbl[column] = var_tbl[column].astype('string') adata.var = adata.var.merge( var_tbl, how="left", From 6e3e0cc8161a60bb627010db4c80ee40911afe0f Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 4 Jul 2024 16:27:01 +0100 Subject: [PATCH 14/31] reverting changes --- scanpy_scripts/lib/_mnn.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scanpy_scripts/lib/_mnn.py b/scanpy_scripts/lib/_mnn.py index 436c7ad0..9dcfc742 100644 --- a/scanpy_scripts/lib/_mnn.py +++ b/scanpy_scripts/lib/_mnn.py @@ -30,7 +30,11 @@ def mnn_correct(adata, key=None, key_added=None, var_subset=None, layer=None, ** batches = np.unique(adata.obs[key]) alldata = [] for batch in batches: - alldata.append(adata[adata.obs[key] == batch,]) + alldata.append( + adata[ + adata.obs[key] == batch, + ] + ) # Process var_subset into a list of strings that can be provided to # mnn_correct() From 366ca6a469a1e6a3612d36f350bd6f50bda3d84b Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 4 Jul 2024 16:27:38 +0100 Subject: [PATCH 15/31] Update _mnn.py --- scanpy_scripts/lib/_mnn.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scanpy_scripts/lib/_mnn.py b/scanpy_scripts/lib/_mnn.py index 9dcfc742..84c6416f 100644 --- a/scanpy_scripts/lib/_mnn.py +++ b/scanpy_scripts/lib/_mnn.py @@ -31,10 +31,10 @@ def mnn_correct(adata, key=None, key_added=None, var_subset=None, layer=None, ** alldata = [] for batch in batches: alldata.append( - adata[ - adata.obs[key] == batch, - ] - ) + adata[ + adata.obs[key] == batch, + ] + ) # Process var_subset into a list of strings that can be provided to # mnn_correct() From 192edbcbcfc1f5a787964854d64769c6a9096e5c Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 5 Jul 2024 08:45:29 +0100 Subject: [PATCH 16/31] fixes anndata version --- test-env.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/test-env.yaml b/test-env.yaml index 83b06361..11711f78 100644 --- a/test-env.yaml +++ b/test-env.yaml @@ -13,6 +13,7 @@ dependencies: - harmonypy>=0.0.5 - bbknn>=1.5.0,<1.6.0 - mnnpy>=0.1.9.5 + - anndata=0.10.5.post1 # for mnnpy using n_jobs - scipy <1.9.0 - scikit-learn <1.3.0 From b70d1308b49bb0407dd1c884c32165ca3f7a6a12 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 5 Jul 2024 08:47:01 +0100 Subject: [PATCH 17/31] Update __init__.py --- scanpy_scripts/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scanpy_scripts/__init__.py b/scanpy_scripts/__init__.py index 86f1bc48..2f222bbc 100644 --- a/scanpy_scripts/__init__.py +++ b/scanpy_scripts/__init__.py @@ -1,7 +1,6 @@ """ Provides version, author and exports """ - import importlib.metadata __version__ = importlib.metadata.version("scanpy-scripts") From d4d7e3571d21902980905c45964f4ffe54af1f17 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 5 Jul 2024 08:53:44 +0100 Subject: [PATCH 18/31] removes Anndata --- test-env.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/test-env.yaml b/test-env.yaml index 11711f78..83b06361 100644 --- a/test-env.yaml +++ b/test-env.yaml @@ -13,7 +13,6 @@ dependencies: - harmonypy>=0.0.5 - bbknn>=1.5.0,<1.6.0 - mnnpy>=0.1.9.5 - - anndata=0.10.5.post1 # for mnnpy using n_jobs - scipy <1.9.0 - scikit-learn <1.3.0 From e0a20acbc9154ce7bdca955528de67885a198736 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 5 Jul 2024 09:01:19 +0100 Subject: [PATCH 19/31] fixes lint --- scanpy_scripts/lib/_read.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scanpy_scripts/lib/_read.py b/scanpy_scripts/lib/_read.py index f1d1cee8..08fb5bbc 100644 --- a/scanpy_scripts/lib/_read.py +++ b/scanpy_scripts/lib/_read.py @@ -27,10 +27,10 @@ def read_10x( obs_tbl = pd.read_csv(extra_obs, sep="\t", header=0, index_col=0) mixed_columns = columns_with_multiple_dtypes(obs_tbl) - # Convert mixed dtype columns to 'object' type to preserve all information + # Convert mixed dtype columns to 'object' type to preserve all information for column in mixed_columns: - obs_tbl[column] = obs_tbl[column].astype('str') - + obs_tbl[column] = obs_tbl[column].astype('str') + adata.obs = adata.obs.merge( obs_tbl, how="left", @@ -51,6 +51,7 @@ def read_10x( ) return adata + def columns_with_multiple_dtypes(df): mixed_dtype_columns = [] for column in df.columns: From 88ea25476faa29b865bf40898d9ffe91c70f3a09 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 5 Jul 2024 09:06:26 +0100 Subject: [PATCH 20/31] fixes `black` test --- scanpy_scripts/lib/_read.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy_scripts/lib/_read.py b/scanpy_scripts/lib/_read.py index 08fb5bbc..49ad6978 100644 --- a/scanpy_scripts/lib/_read.py +++ b/scanpy_scripts/lib/_read.py @@ -29,7 +29,7 @@ def read_10x( # Convert mixed dtype columns to 'object' type to preserve all information for column in mixed_columns: - obs_tbl[column] = obs_tbl[column].astype('str') + obs_tbl[column] = obs_tbl[column].astype("str") adata.obs = adata.obs.merge( obs_tbl, From 3a0ebf50272409e49f6cfcb7de2f79e968b6e930 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 5 Jul 2024 09:15:04 +0100 Subject: [PATCH 21/31] reverting `black` command --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 63b9a8eb..2e9db02f 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -29,7 +29,7 @@ jobs: - name: Run black manually run: | - black --diff --verbose ./ + black --check --verbose ./ # - name: Install dependencies # run: | From 024d3bf6f3b0800c72a9ff40fe29eccf4d627709 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 5 Jul 2024 09:58:58 +0100 Subject: [PATCH 22/31] applying mixed column test --- scanpy-scripts-tests.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy-scripts-tests.bats b/scanpy-scripts-tests.bats index 93af736d..07b507c1 100755 --- a/scanpy-scripts-tests.bats +++ b/scanpy-scripts-tests.bats @@ -180,7 +180,7 @@ setup() { skip "$singlet_obs exists" fi - run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.sbatch2\n" {1..1350})" > $batch_obs + run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.sbatch2\n" {1..1350})" | awk '{if (NR == 1) {print $0; next} if (NR % 100 == 0) {print NR / 100; next} if (NR % 250 == 0) {print ""; next} print $0}' > $batch_obs [ "$status" -eq 0 ] [ -f "$batch_obs" ] From 99c9f10e7e400e481b80035c32776c2e49009769 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Fri, 5 Jul 2024 10:06:00 +0100 Subject: [PATCH 23/31] revert scanpy-scripts-tests.bats --- scanpy-scripts-tests.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy-scripts-tests.bats b/scanpy-scripts-tests.bats index 07b507c1..93af736d 100755 --- a/scanpy-scripts-tests.bats +++ b/scanpy-scripts-tests.bats @@ -180,7 +180,7 @@ setup() { skip "$singlet_obs exists" fi - run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.sbatch2\n" {1..1350})" | awk '{if (NR == 1) {print $0; next} if (NR % 100 == 0) {print NR / 100; next} if (NR % 250 == 0) {print ""; next} print $0}' > $batch_obs + run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.sbatch2\n" {1..1350})" > $batch_obs [ "$status" -eq 0 ] [ -f "$batch_obs" ] From 0db82249c103740c63019f18cfcd8e6801099915 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 22 Aug 2024 10:32:27 +0100 Subject: [PATCH 24/31] Update _read.py - fixes mixed data types in anndata --- scanpy_scripts/lib/_read.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/scanpy_scripts/lib/_read.py b/scanpy_scripts/lib/_read.py index 49ad6978..3f6f9bb9 100644 --- a/scanpy_scripts/lib/_read.py +++ b/scanpy_scripts/lib/_read.py @@ -25,11 +25,6 @@ def read_10x( if extra_obs: obs_tbl = pd.read_csv(extra_obs, sep="\t", header=0, index_col=0) - mixed_columns = columns_with_multiple_dtypes(obs_tbl) - - # Convert mixed dtype columns to 'object' type to preserve all information - for column in mixed_columns: - obs_tbl[column] = obs_tbl[column].astype("str") adata.obs = adata.obs.merge( obs_tbl, @@ -49,6 +44,18 @@ def read_10x( right_index=True, suffixes=(False, False), ) + + # Convert mixed dtype columns to 'object' type to preserve all information + obs_mixed_columns = columns_with_multiple_dtypes(adata.obs) + + for column in obs_mixed_columns: + adata.obs[column] = adata.obs[column].astype("str") + + var_mixed_columns = columns_with_multiple_dtypes(adata.var) + + for column in var_mixed_columns: + adata.var[column] = adata.var[column].astype("str") + return adata From 4cacb3996634edb59660567f7d3fcae118b7231f Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 22 Aug 2024 10:33:14 +0100 Subject: [PATCH 25/31] Update _read.py - removes empty lines --- scanpy_scripts/lib/_read.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scanpy_scripts/lib/_read.py b/scanpy_scripts/lib/_read.py index 3f6f9bb9..1d4f4695 100644 --- a/scanpy_scripts/lib/_read.py +++ b/scanpy_scripts/lib/_read.py @@ -25,7 +25,6 @@ def read_10x( if extra_obs: obs_tbl = pd.read_csv(extra_obs, sep="\t", header=0, index_col=0) - adata.obs = adata.obs.merge( obs_tbl, how="left", @@ -36,7 +35,6 @@ def read_10x( if extra_var: var_tbl = pd.read_csv(extra_var, sep="\t", header=0, index_col=0) - adata.var = adata.var.merge( var_tbl, how="left", From 2b0ceed97edebc7ad01bef8b9f8bab5331d11dbd Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 22 Aug 2024 10:33:31 +0100 Subject: [PATCH 26/31] Update _read.py --- scanpy_scripts/lib/_read.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy_scripts/lib/_read.py b/scanpy_scripts/lib/_read.py index 1d4f4695..7225c671 100644 --- a/scanpy_scripts/lib/_read.py +++ b/scanpy_scripts/lib/_read.py @@ -43,7 +43,7 @@ def read_10x( suffixes=(False, False), ) - # Convert mixed dtype columns to 'object' type to preserve all information + # Convert mixed dtype columns to 'string' type to preserve all information obs_mixed_columns = columns_with_multiple_dtypes(adata.obs) for column in obs_mixed_columns: From 6b28282524d8d1195fd040411cc4c9a36621869f Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 22 Aug 2024 10:37:22 +0100 Subject: [PATCH 27/31] Update python-package.yml - temporary update black to check errs --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 2e9db02f..63b9a8eb 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -29,7 +29,7 @@ jobs: - name: Run black manually run: | - black --check --verbose ./ + black --diff --verbose ./ # - name: Install dependencies # run: | From 4683fcdb894f14b13c69d5dfd2b6db5b071ab57b Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 22 Aug 2024 10:41:21 +0100 Subject: [PATCH 28/31] Update _read.py - fixes black test --- scanpy_scripts/lib/_read.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy_scripts/lib/_read.py b/scanpy_scripts/lib/_read.py index 7225c671..0315d5d5 100644 --- a/scanpy_scripts/lib/_read.py +++ b/scanpy_scripts/lib/_read.py @@ -53,7 +53,7 @@ def read_10x( for column in var_mixed_columns: adata.var[column] = adata.var[column].astype("str") - + return adata From 193b8d4aa21f44a2772823ab66b4af77757df0bc Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 22 Aug 2024 10:49:45 +0100 Subject: [PATCH 29/31] Update python-package.yml - reverting black test --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 63b9a8eb..2e9db02f 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -29,7 +29,7 @@ jobs: - name: Run black manually run: | - black --diff --verbose ./ + black --check --verbose ./ # - name: Install dependencies # run: | From 609144fc7961bc1f78631ec9954b5c6d0ce4ba72 Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 22 Aug 2024 11:18:27 +0100 Subject: [PATCH 30/31] Update scanpy-scripts-tests.bats - to test anndata changes --- scanpy-scripts-tests.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy-scripts-tests.bats b/scanpy-scripts-tests.bats index 93af736d..07b507c1 100755 --- a/scanpy-scripts-tests.bats +++ b/scanpy-scripts-tests.bats @@ -180,7 +180,7 @@ setup() { skip "$singlet_obs exists" fi - run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.sbatch2\n" {1..1350})" > $batch_obs + run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.sbatch2\n" {1..1350})" | awk '{if (NR == 1) {print $0; next} if (NR % 100 == 0) {print NR / 100; next} if (NR % 250 == 0) {print ""; next} print $0}' > $batch_obs [ "$status" -eq 0 ] [ -f "$batch_obs" ] From 943ca3ccb5099dbe782200d81ff3dfedaf891afe Mon Sep 17 00:00:00 2001 From: Anil Thanki Date: Thu, 22 Aug 2024 12:36:13 +0100 Subject: [PATCH 31/31] Update scanpy-scripts-tests.bats - adds additional column for test --- scanpy-scripts-tests.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpy-scripts-tests.bats b/scanpy-scripts-tests.bats index 07b507c1..86e1d3f2 100755 --- a/scanpy-scripts-tests.bats +++ b/scanpy-scripts-tests.bats @@ -180,7 +180,7 @@ setup() { skip "$singlet_obs exists" fi - run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.sbatch2\n" {1..1350})" | awk '{if (NR == 1) {print $0; next} if (NR % 100 == 0) {print NR / 100; next} if (NR % 250 == 0) {print ""; next} print $0}' > $batch_obs + run rm -rf $batch_obs && echo -e "batch\tadditional_column\n$(for i in {1..1350}; do echo -e "batch1\tdata$i"; done)\n$(for i in {1..1350}; do echo -e "batch2\tinfo$i"; done)" > $batch_obs [ "$status" -eq 0 ] [ -f "$batch_obs" ]