scverse · ivirshup · Mar 18, 2021 · Feb 24, 2021 · Feb 24, 2021 · Feb 24, 2021
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,5 @@
+# Can't yet be moved to the pyproject.toml due to https://gitlab.com/pycqa/flake8/-/issues/428#note_251982786
+[flake8]
+max-line-length = 88
+# switched off since they conflict with black's standards
+ignore = F401, W503, E501, E203, E231, W504, E402, E126, E712, E741, E266, E262
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,3 +3,7 @@ repos:
     rev: 20.8b1
     hooks:
     -   id: black
+-   repo: https://gitlab.com/pycqa/flake8
+    rev: 3.8.4
+    hooks:
+    -   id: flake8
diff --git a/scanpy/_utils.py b/scanpy/_utils.py
@@ -209,7 +209,7 @@ def get_igraph_from_adjacency(adjacency, directed=None):
     g.add_edges(list(zip(sources, targets)))
     try:
         g.es['weight'] = weights
-    except:
+    except KeyError:
         pass
     if g.vcount() != adjacency.shape[0]:
         logg.warning(
@@ -551,7 +551,7 @@ def warn_with_traceback(message, category, filename, lineno, file=None, line=Non
     import traceback
 
     traceback.print_stack()
-    log = file if hasattr(file, 'write') else sys.stderr
+    log = file if hasattr(file, 'write') else sys.stderr  # noqa: F841
     settings.write(warnings.formatwarning(message, category, filename, lineno, line))
 
 

diff --git a/scanpy/external/pl.py b/scanpy/external/pl.py
@@ -332,15 +332,14 @@ def scrublet_score_distribution(
     figsize: Optional[Tuple[float, float]] = (8, 3),
 ):
     """\
-    Plot histogram of doublet scores for observed transcriptomes and simulated doublets. 
+    Plot histogram of doublet scores for observed transcriptomes and simulated doublets.
+
+    The histogram for simulated doublets is useful for determining the correct doublet score threshold.
 
-    The histogram for simulated doublets is useful for determining the correct doublet 
-    score threshold. 
-
     Parameters
     ----------
     adata
-        An annData object resulting from func:`~scanpy.external.scrublet`.  
+        An annData object resulting from func:`~scanpy.external.scrublet`.
     scale_hist_obs
         Set y axis scale transformation in matplotlib for the plot of observed
         transcriptomes (e.g. "linear", "log", "symlog", "logit")
@@ -353,9 +352,9 @@ def scrublet_score_distribution(
     See also
     --------
     :func:`~scanpy.external.pp.scrublet`: Main way of running Scrublet, runs
-        preprocessing, doublet simulation (this function) and calling. 
+        preprocessing, doublet simulation (this function) and calling.
     :func:`~scanpy.external.pp.scrublet_simulate_doublets`: Run Scrublet's doublet
-        simulation separately for advanced usage. 
+        simulation separately for advanced usage.
     """
 
     threshold = adata.uns['scrublet']['threshold']

diff --git a/scanpy/external/pp/_scrublet.py b/scanpy/external/pp/_scrublet.py
@@ -1,5 +1,5 @@
 from anndata import AnnData
-from typing import Collection, Tuple, Optional, Union
+from typing import Optional
 import numpy as np
 
 from ... import logging as logg
@@ -38,7 +38,7 @@ def scrublet(
     and directly call functions of Scrublet(). You may also undertake your own
     preprocessing, simulate doublets with
     scanpy.external.pp.scrublet_simulate_doublets(), and run the core scrublet
-    function scanpy.external.pp.scrublet.scrublet(). 
+    function scanpy.external.pp.scrublet.scrublet().
 
     .. note::
         More information and bug reports `here
@@ -59,7 +59,7 @@ def scrublet(
         as adata. This should have been built from adata_obs after
         filtering genes and cells and selcting highly-variable genes.
     sim_doublet_ratio
-        Number of doublets to simulate relative to the number of observed 
+        Number of doublets to simulate relative to the number of observed
         transcriptomes.
     expected_doublet_rate
         Where adata_sim not suplied, the estimated doublet rate for the
@@ -71,8 +71,8 @@ def scrublet(
         synthetic doublets. If 1.0, each doublet is created by simply adding
         the UMI counts from two randomly sampled observed transcriptomes. For
         values less than 1, the UMI counts are added and then randomly sampled
-        at the specified rate. 
-    knn_dist_metric 
+        at the specified rate.
+    knn_dist_metric
         Distance metric used when finding nearest neighbors. For list of
         valid values, see the documentation for annoy (if `use_approx_neighbors`
         is True) or sklearn.neighbors.NearestNeighbors (if `use_approx_neighbors`
@@ -88,16 +88,16 @@ def scrublet(
         If True, center the data such that each gene has a mean of 0.
         `sklearn.decomposition.PCA` will be used for dimensionality
         reduction.
-    n_prin_comps 
+    n_prin_comps
         Number of principal components used to embed the transcriptomes prior
-        to k-nearest-neighbor graph construction. 
+        to k-nearest-neighbor graph construction.
     use_approx_neighbors
-        Use approximate nearest neighbor method (annoy) for the KNN 
+        Use approximate nearest neighbor method (annoy) for the KNN
         classifier.
     get_doublet_neighbor_parents
         If True, return (in .uns) the parent transcriptomes that generated the
         doublet neighbors of each observed transcriptome. This information can
-        be used to infer the cell states that generated a given doublet state. 
+        be used to infer the cell states that generated a given doublet state.
     n_neighbors
         Number of neighbors used to construct the KNN graph of observed
         transcriptomes and simulated doublets. If ``None``, this is
@@ -131,7 +131,7 @@ def scrublet(
         ``adata.uns['scrublet']['doublet_scores_sim']``
             Doublet scores for each simulated doublet transcriptome
 
-        ``adata.uns['scrublet']['doublet_parents']`` 
+        ``adata.uns['scrublet']['doublet_parents']``
             Pairs of ``.obs_names`` used to generate each simulated doublet
             transcriptome
 
@@ -141,9 +141,9 @@ def scrublet(
     See also
     --------
     :func:`~scanpy.external.pp.scrublet_simulate_doublets`: Run Scrublet's doublet
-        simulation separately for advanced usage. 
+        simulation separately for advanced usage.
     :func:`~scanpy.external.pl.scrublet_score_distribution`: Plot histogram of doublet
-        scores for observed transcriptomes and simulated doublets. 
+        scores for observed transcriptomes and simulated doublets.
     """
     try:
         import scrublet as sl
@@ -183,7 +183,7 @@ def scrublet(
             pp.highly_variable_genes(adata_obs, subset=True)
         else:
             logged = pp.log1p(adata_obs, copy=True)
-            hvg = pp.highly_variable_genes(logged)
+            _ = pp.highly_variable_genes(logged)
             adata_obs = adata_obs[:, logged.var['highly_variable']]
 
         # Simulate the doublets based on the raw expressions from the normalised
@@ -255,7 +255,7 @@ def _scrublet_call_doublets(
     transcriptomes and simulated doublets. This is a wrapper around the core
     functions of `Scrublet <https://github.com/swolock/scrublet>`__ to allow
     for flexibility in applying Scanpy filtering operations upstream. Unless
-    you know what you're doing you should use the main scrublet() function.    
+    you know what you're doing you should use the main scrublet() function.
 
     .. note::
         More information and bug reports `here
@@ -291,20 +291,20 @@ def _scrublet_call_doublets(
         reduction, unless `mean_center` is True.
     n_prin_comps
         Number of principal components used to embed the transcriptomes prior
-        to k-nearest-neighbor graph construction. 
+        to k-nearest-neighbor graph construction.
     use_approx_neighbors
-        Use approximate nearest neighbor method (annoy) for the KNN 
+        Use approximate nearest neighbor method (annoy) for the KNN
         classifier.
     knn_dist_metric
         Distance metric used when finding nearest neighbors. For list of
         valid values, see the documentation for annoy (if `use_approx_neighbors`
         is True) or sklearn.neighbors.NearestNeighbors (if `use_approx_neighbors`
         is False).
     get_doublet_neighbor_parents
-        If True, return the parent transcriptomes that generated the 
-        doublet neighbors of each observed transcriptome. This information can 
-        be used to infer the cell states that generated a given 
-        doublet state. 
+        If True, return the parent transcriptomes that generated the
+        doublet neighbors of each observed transcriptome. This information can
+        be used to infer the cell states that generated a given
+        doublet state.
     threshold
         Doublet score threshold for calling a transcriptome a doublet. If
         `None`, this is set automatically by looking for the minimum between
@@ -314,7 +314,7 @@ def _scrublet_call_doublets(
         predicted doublets in a 2-D embedding.
     random_state
         Initial state for doublet simulation and nearest neighbors.
-    verbose 
+    verbose
         If True, print progress updates.
 
     Returns
@@ -331,7 +331,7 @@ def _scrublet_call_doublets(
         ``adata.uns['scrublet']['doublet_scores_sim']``
             Doublet scores for each simulated doublet transcriptome
 
-        ``adata.uns['scrublet']['doublet_parents']`` 
+        ``adata.uns['scrublet']['doublet_parents']``
             Pairs of ``.obs_names`` used to generate each simulated doublet transcriptome
 
         ``uns['scrublet']['parameters']``
@@ -444,16 +444,16 @@ def scrublet_simulate_doublets(
         The annotated data matrix of shape ``n_obs`` × ``n_vars``. Rows
         correspond to cells and columns to genes. Genes should have been
         filtered for expression and variability, and the object should contain
-        raw expression of the same dimensions. 
+        raw expression of the same dimensions.
     layer
-        Layer of adata where raw values are stored, or 'X' if values are in .X. 
+        Layer of adata where raw values are stored, or 'X' if values are in .X.
     sim_doublet_ratio
-        Number of doublets to simulate relative to the number of observed 
+        Number of doublets to simulate relative to the number of observed
         transcriptomes. If `None`, self.sim_doublet_ratio is used.
     synthetic_doublet_umi_subsampling
-        Rate for sampling UMIs when creating synthetic doublets. If 1.0, 
-        each doublet is created by simply adding the UMIs from two randomly 
-        sampled observed transcriptomes. For values less than 1, the 
+        Rate for sampling UMIs when creating synthetic doublets. If 1.0,
+        each doublet is created by simply adding the UMIs from two randomly
+        sampled observed transcriptomes. For values less than 1, the
         UMI counts are added and then randomly sampled at the specified
         rate.
 
@@ -462,7 +462,7 @@ def scrublet_simulate_doublets(
     adata : anndata.AnnData with simulated doublets in .X
         if ``copy=True`` it returns or else adds fields to ``adata``:
 
-        ``adata.uns['scrublet']['doublet_parents']`` 
+        ``adata.uns['scrublet']['doublet_parents']``
             Pairs of ``.obs_names`` used to generate each simulated doublet transcriptome
 
         ``uns['scrublet']['parameters']``
@@ -471,9 +471,9 @@ def scrublet_simulate_doublets(
     See also
     --------
     :func:`~scanpy.external.pp.scrublet`: Main way of running Scrublet, runs
-        preprocessing, doublet simulation (this function) and calling. 
+        preprocessing, doublet simulation (this function) and calling.
     :func:`~scanpy.external.pl.scrublet_score_distribution`: Plot histogram of doublet
-        scores for observed transcriptomes and simulated doublets. 
+        scores for observed transcriptomes and simulated doublets.
     """
     try:
         import scrublet as sl

diff --git a/scanpy/external/pp/_scvi.py b/scanpy/external/pp/_scvi.py
@@ -33,12 +33,12 @@ def scvi(
 
     Fits scVI model onto raw count data given an anndata object
 
-    scVI uses stochastic optimization and deep neural networks to aggregate information 
+    scVI uses stochastic optimization and deep neural networks to aggregate information
     across similar cells and genes and to approximate the distributions that underlie
     observed expression values, while accounting for batch effects and limited sensitivity.
 
     To use a linear-decoded Variational AutoEncoder model (implementation of [Svensson20]_.),
-    set linear_decoded = True. Compared to standard VAE, this model is less powerful, but can 
+    set linear_decoded = True. Compared to standard VAE, this model is less powerful, but can
     be used to inspect which genes contribute to variation in the dataset. It may also be used
     for all scVI tasks, like differential expression, batch correction, imputation, etc.
     However, batch correction may be less powerful as it assumes a linear model.
@@ -69,13 +69,13 @@ def scvi(
     train_size
         The train size, either a float between 0 and 1 or an integer for the number of training samples to use
     batch_key
-        Column name in anndata.obs for batches. 
+        Column name in anndata.obs for batches.
         If None, no batch correction is performed
         If not None, batch correction is performed per batch category
     use_highly_variable_genes
         If true, uses only the genes in anndata.var["highly_variable"]
     subset_genes
-        Optional list of indices or gene names to subset anndata. 
+        Optional list of indices or gene names to subset anndata.
         If not None, use_highly_variable_genes is ignored
     linear_decoder
         If true, uses LDVAE model, which is an implementation of [Svensson20]_.
@@ -89,18 +89,18 @@ def scvi(
         Extra arguments for UnsupervisedTrainer
     model_kwargs
         Extra arguments for VAE or LDVAE model
-    
+
     Returns
     -------
     If `copy` is true, anndata is returned.
     If `return_posterior` is true, the posterior object is returned
-    If both `copy` and `return_posterior` are true, 
-    a tuple of anndata and the posterior are returned in that order. 
+    If both `copy` and `return_posterior` are true,
+    a tuple of anndata and the posterior are returned in that order.
 
     `adata.obsm['X_scvi']` stores the latent representations
     `adata.obsm['X_scvi_denoised']` stores the normalized mean of the negative binomial
     `adata.obsm['X_scvi_sample_rate']` stores the mean of the negative binomial
-    
+
     If linear_decoder is true:
     `adata.uns['ldvae_loadings']` stores the per-gene weights in the linear decoder as a
     genes by n_latent matrix.

diff --git a/scanpy/external/tl/_trimap.py b/scanpy/external/tl/_trimap.py
@@ -76,7 +76,7 @@ def trimap(
 
     Example
     -------
-    
+
     >>> import scanpy as sc
     >>> import scanpy.external as sce
     >>> pbmc = sc.datasets.pbmc68k_reduced()

diff --git a/scanpy/get/get.py b/scanpy/get/get.py
@@ -96,7 +96,7 @@ def rank_genes_groups_df(
 def _check_indices(
     dim_df: pd.DataFrame,
     alt_index: pd.Index,
-    dim: "Literal['obs', 'var']",
+    dim: "Literal['obs', 'var']",  # noqa: F821
     keys: List[str],
     alias_index: Optional[pd.Index] = None,
     use_raw: bool = False,
@@ -176,7 +176,7 @@ def _get_array_values(
     X,
     dim_names: pd.Index,
     keys: List[str],
-    axis: "Literal[0, 1]",
+    axis: "Literal[0, 1]",  # noqa: F821
     backed: bool,
 ):
     # TODO: This should be made easier on the anndata side

diff --git a/scanpy/neighbors/__init__.py b/scanpy/neighbors/__init__.py
@@ -15,13 +15,11 @@
 from ..tools._utils import _choose_representation, doc_use_rep, doc_n_pcs
 from .. import settings
 
-
 N_DCS = 15  # default number of diffusion components
 N_PCS = (
     settings.N_PCS
 )  # Backwards compat, constants should be defined in only one place.
 
-
 _Method = Literal['umap', 'gauss', 'rapids']
 _MetricFn = Callable[[np.ndarray, np.ndarray], float]
 # from sklearn.metrics.pairwise_distances.__doc__:
@@ -126,7 +124,7 @@ def neighbors(
     **distances** : sparse matrix of dtype `float32`.
         Instead of decaying weights, this stores distances for each pair of
         neighbors.
-    
+
     Notes
     -----
     If `method='umap'`, it's highly recommended to install pynndescent ``pip install pynndescent``.
@@ -799,7 +797,7 @@ def compute_neighbors(
             try:
                 if forest:
                     self._rp_forest = _make_forest_dict(forest)
-            except:
+            except Exception:
                 pass
         # write indices as attributes
         if write_knn_indices:

diff --git a/scanpy/plotting/__init__.py b/scanpy/plotting/__init__.py
@@ -77,7 +77,7 @@
 Classes
 -------
 
-These classes allow fine tuning of visual parameters. 
+These classes allow fine tuning of visual parameters.
 
 .. autosummary::
    :toctree: .

diff --git a/scanpy/plotting/_anndata.py b/scanpy/plotting/_anndata.py
@@ -552,7 +552,7 @@ def ranking(
         n_rows, n_cols = 1, n_panels
     else:
         n_rows, n_cols = 2, int(n_panels / 2 + 0.5)
-    fig = pl.figure(
+    _ = pl.figure(
         figsize=(
             n_cols * rcParams['figure.figsize'][0],
             n_rows * rcParams['figure.figsize'][1],
@@ -1474,7 +1474,7 @@ def tracksplot(
         ymin, ymax = ax.get_ylim()
         ymax = int(ymax)
         ax.set_yticks([ymax])
-        tt = ax.set_yticklabels([str(ymax)], ha='left', va='top')
+        ax.set_yticklabels([str(ymax)], ha='left', va='top')
         ax.spines['right'].set_position(('axes', 1.01))
         ax.tick_params(
             axis='y',
@@ -1960,7 +1960,7 @@ def _plot_gene_groups_brackets(
                     va='bottom',
                     rotation=rotation,
                 )
-            except:
+            except Exception:
                 pass
     else:
         top = left
-Original file line number
+Diff line change
@@ Expand Up / @@ -76,7 +76,7 @@ def trimap( @@
         Example
         -------
         >>> import scanpy as sc
         >>> import scanpy.external as sce
         >>> pbmc = sc.datasets.pbmc68k_reduced()
@@ Expand Down @@