diff --git a/kb_python/bins/darwin/bustools/bustools b/kb_python/bins/darwin/bustools/bustools index 0a4217d..1c788fc 100755 Binary files a/kb_python/bins/darwin/bustools/bustools and b/kb_python/bins/darwin/bustools/bustools differ diff --git a/kb_python/bins/darwin/m1/bustools/bustools b/kb_python/bins/darwin/m1/bustools/bustools index 0a4217d..1c788fc 100755 Binary files a/kb_python/bins/darwin/m1/bustools/bustools and b/kb_python/bins/darwin/m1/bustools/bustools differ diff --git a/kb_python/bins/linux/bustools/bustools b/kb_python/bins/linux/bustools/bustools index d8ec9f8..7f7ec41 100755 Binary files a/kb_python/bins/linux/bustools/bustools and b/kb_python/bins/linux/bustools/bustools differ diff --git a/kb_python/bins/windows/bustools/bustools.exe b/kb_python/bins/windows/bustools/bustools.exe index f07364e..28b7e81 100755 Binary files a/kb_python/bins/windows/bustools/bustools.exe and b/kb_python/bins/windows/bustools/bustools.exe differ diff --git a/kb_python/count.py b/kb_python/count.py index f82b723..3cdc449 100755 --- a/kb_python/count.py +++ b/kb_python/count.py @@ -204,6 +204,8 @@ def kallisto_bus( if technology.upper() in ('BULK', 'SMARTSEQ3'): results['saved_index'] = os.path.join(out_dir, SAVED_INDEX_FILENAME) + if os.path.exists(results['saved_index']): + os.remove(results['saved_index']) # TODO: Fix this in kallisto? return results diff --git a/kb_python/main.py b/kb_python/main.py index 9223133..9c7d776 100755 --- a/kb_python/main.py +++ b/kb_python/main.py @@ -636,7 +636,7 @@ def parse_count( parser.error( f'Technology `{args.x}` can not be used with workflow {args.workflow}.' ) - if args.sum is not None: + if args.sum != "none": parser.error('--sum incompatible with lamanno/nucleus') if args.x.upper() == 'SMARTSEQ3': from .count import count_velocity_smartseq3 @@ -1036,8 +1036,8 @@ def setup_ref_args( '--workflow', metavar='{standard,nac,kite,custom}', help=( - 'Type of workflow to prepare files for. ' - 'Use `nac` for RNA velocity or single-nucleus RNA-seq reads. ' + 'The type of index to create. ' + 'Use `nac` for an index type that can quantify nascent and mature RNA. ' 'Use `custom` for indexing targets directly. ' 'Use `kite` for feature barcoding. (default: standard)' ), @@ -1253,7 +1253,7 @@ def setup_count_args( metavar='{standard,nac,kite,kite:10xFB}', help=( 'Type of workflow. ' - 'Use `nac` for RNA velocity or single-nucleus RNA-seq reads. ' + 'Use `nac` to specify a nac index for producing mature/nascent/ambiguous matrices. ' 'Use `kite` for feature barcoding. ' 'Use `kite:10xFB` for 10x Genomics Feature Barcoding technology. ' '(default: standard)' @@ -1301,14 +1301,14 @@ def setup_count_args( required_nac.add_argument( '-c1', metavar='T2C', - help='Path to cDNA transcripts-to-capture', + help='Path to mature transcripts-to-capture', type=str, required=workflow in {'nac'} ) required_nac.add_argument( '-c2', metavar='T2C', - help='Path to intron transcripts-to-captured', + help='Path to nascent transcripts-to-captured', type=str, required=workflow in {'nac'} ) diff --git a/kb_python/ref.py b/kb_python/ref.py index 9b71419..783bd99 100755 --- a/kb_python/ref.py +++ b/kb_python/ref.py @@ -282,7 +282,12 @@ def kallisto_index( command += ['--d-list-overhang', dlist_overhang] if temp_dir != 'tmp': command += ['-T', temp_dir] - command += [fasta_path] + if ',' in fasta_path: + fasta_paths = fasta_path.split(',') + for fp in fasta_paths: + command += [fp] + else: + command += [fasta_path] run_executable(command) return {'index': index_path} @@ -303,11 +308,15 @@ def get_dlist_fasta(fasta_path: str = None, temp_dir: str = 'tmp') -> str: if "://" not in fasta_path: # Not a URL return fasta_path new_fasta_path = get_temporary_filename(temp_dir) + fasta_path_array = [fasta_path] + if fasta_path.count("://") > 1: + fasta_path_array = fasta_path.split(",") logger.info(f'Extracting {fasta_path} into {new_fasta_path}') - with ngs.fasta.Fasta(fasta_path, 'r') as f_in: - with ngs.fasta.Fasta(new_fasta_path, 'w') as f_out: - for entry in f_in: - f_out.write(entry) + with ngs.fasta.Fasta(new_fasta_path, 'w') as f_out: + for fp in fasta_path_array: + with ngs.fasta.Fasta(fp, 'r') as f_in: + for entry in f_in: + f_out.write(entry) return new_fasta_path @@ -803,7 +812,7 @@ def ref_custom( if not glob.glob(f'{index_path}*') or overwrite: index_result = kallisto_index( - ' '.join(fasta_paths), + ','.join(fasta_paths), index_path, k=k or 31, threads=threads, diff --git a/kb_python/utils.py b/kb_python/utils.py index 59be19d..fec20e3 100755 --- a/kb_python/utils.py +++ b/kb_python/utils.py @@ -711,8 +711,8 @@ def overlay_anndatas( spliced_intersection = adata_spliced[obs_idx][:, var_idx] unspliced_intersection = adata_unspliced[obs_idx][:, var_idx] a_layers = { - 'spliced': spliced_intersection.X, - 'unspliced': unspliced_intersection.X + 'mature': spliced_intersection.X, + 'nascent': unspliced_intersection.X } ambiguous_intersection = None if adata_ambiguous is not None: diff --git a/tests/test_utils.py b/tests/test_utils.py index cf5f9d3..08dfcfd 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -227,7 +227,7 @@ def test_overlay_anndatas(self): self.unspliced_genes_path ) adata = utils.overlay_anndatas(adata_spliced, adata_unspliced) - self.assertEqual({'spliced', 'unspliced'}, set(adata.layers.keys())) + self.assertEqual({'mature', 'nascent'}, set(adata.layers.keys())) def test_sum_anndatas(self): adata_spliced = utils.import_matrix_as_anndata(