scverse · ivirshup · Mar 18, 2021 · Feb 24, 2021 · Feb 24, 2021 · Feb 24, 2021
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,37 @@
+# Can't yet be moved to the pyproject.toml due to https://gitlab.com/pycqa/flake8/-/issues/428#note_251982786
+[flake8]
+max-line-length = 88
+ignore = # module imported but unused -> required for Scanpys API
+         F401,
+         # line break before a binary operator -> black does not adhere to PEP8
+         W503,
+         # line break occured after a binary operator -> black does not adhere to PEP8
+         W504,
+         # line too long -> we accept long comment lines; black gets rid of long code lines
+         E501,
+         # whitespace before : -> black does not adhere to PEP8
+         E203,
+         # missing whitespace after ,', ';', or ':' -> black does not adhere to PEP8
+         E231,
+         # module level import not at top of file -> required to circumvent circular imports for Scanpys API
+         E402,
+         # continuation line over-indented for hanging indent -> black does not adhere to PEP8
+         E126,
+         # E266 too many leading '#' for block comment -> Scanpy allows them for comments into sections
+         E262,
+         # inline comment should start with '# ' -> Scanpy allows them for specific explanations
+         E266,
+         # Do not assign a lambda expression, use a def -> Scanpy allows lambda expression assignments,
+         E731,
+         # allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation
+         E741
+ per-file-ignores =
+    # F811 Redefinition of unused name from line, does not play nice with pytest fixtures
+    tests/test*.py: F811
+exclude =
+    .git,
+    __pycache__,
+    build,
+    docs/_build,
+    dist,
+
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,3 +3,12 @@ repos:
     rev: 20.8b1
     hooks:
     -   id: black
+-   repo: https://gitlab.com/pycqa/flake8
+    rev: 3.8.4
+    hooks:
+    -   id: flake8
+-   repo: https://github.com/pre-commit/mirrors-autopep8
+    rev: v1.5.5
+    hooks:
+    -   id: autopep8
+        args: ["-i"]
diff --git a/docs/dev/code.rst b/docs/dev/code.rst
@@ -17,6 +17,11 @@ Code style
 
 New code should follow
 `Black <https://black.readthedocs.io/en/stable/the_black_code_style.html>`__
-and Scanpy’s
+and
+`flake8 <https://flake8.pycqa.org>`__.
+We ignore a couple of flake8 checks which are documented in the .flake8 file in the root of this repository.
+To learn how to ignore checks per line please read
+`flake8 violations <https://flake8.pycqa.org/en/latest/user/violations.html>`__.
+Additionally, we use Scanpy’s
 `EditorConfig <https://github.com/theislab/scanpy/blob/master/.editorconfig>`__,
 so using an editor/IDE with support for both is helpful.
diff --git a/scanpy/_utils.py b/scanpy/_utils.py
@@ -212,7 +212,7 @@ def get_igraph_from_adjacency(adjacency, directed=None):
     g.add_edges(list(zip(sources, targets)))
     try:
         g.es['weight'] = weights
-    except:
+    except KeyError:
         pass
     if g.vcount() != adjacency.shape[0]:
         logg.warning(
@@ -554,7 +554,9 @@ def warn_with_traceback(message, category, filename, lineno, file=None, line=Non
     import traceback
 
     traceback.print_stack()
-    log = file if hasattr(file, 'write') else sys.stderr
+    log = (  # noqa: F841  # TODO Does this need fixing?
+        file if hasattr(file, 'write') else sys.stderr
+    )
     settings.write(warnings.formatwarning(message, category, filename, lineno, line))
 
 

diff --git a/scanpy/external/pl.py b/scanpy/external/pl.py
@@ -332,15 +332,15 @@ def scrublet_score_distribution(
     figsize: Optional[Tuple[float, float]] = (8, 3),
 ):
     """\
-    Plot histogram of doublet scores for observed transcriptomes and simulated doublets. 
+    Plot histogram of doublet scores for observed transcriptomes and simulated doublets.
+
+    The histogram for simulated doublets is useful for determining the correct doublet
+    score threshold.
 
-    The histogram for simulated doublets is useful for determining the correct doublet 
-    score threshold. 
-
     Parameters
     ----------
     adata
-        An annData object resulting from func:`~scanpy.external.scrublet`.  
+        An annData object resulting from func:`~scanpy.external.scrublet`.
     scale_hist_obs
         Set y axis scale transformation in matplotlib for the plot of observed
         transcriptomes (e.g. "linear", "log", "symlog", "logit")
@@ -353,9 +353,9 @@ def scrublet_score_distribution(
     See also
     --------
     :func:`~scanpy.external.pp.scrublet`: Main way of running Scrublet, runs
-        preprocessing, doublet simulation (this function) and calling. 
+        preprocessing, doublet simulation (this function) and calling.
     :func:`~scanpy.external.pp.scrublet_simulate_doublets`: Run Scrublet's doublet
-        simulation separately for advanced usage. 
+        simulation separately for advanced usage.
     """
 
     threshold = adata.uns['scrublet']['threshold']

diff --git a/scanpy/external/pp/_scrublet.py b/scanpy/external/pp/_scrublet.py
@@ -1,5 +1,5 @@
 from anndata import AnnData
-from typing import Collection, Tuple, Optional, Union
+from typing import Optional
 import numpy as np
 from scipy import sparse
 
@@ -40,7 +40,7 @@ def scrublet(
     and directly call functions of Scrublet(). You may also undertake your own
     preprocessing, simulate doublets with
     scanpy.external.pp.scrublet_simulate_doublets(), and run the core scrublet
-    function scanpy.external.pp.scrublet.scrublet(). 
+    function scanpy.external.pp.scrublet.scrublet().
 
     .. note::
         More information and bug reports `here
@@ -61,7 +61,7 @@ def scrublet(
         as adata. This should have been built from adata_obs after
         filtering genes and cells and selcting highly-variable genes.
     sim_doublet_ratio
-        Number of doublets to simulate relative to the number of observed 
+        Number of doublets to simulate relative to the number of observed
         transcriptomes.
     expected_doublet_rate
         Where adata_sim not suplied, the estimated doublet rate for the
@@ -73,8 +73,8 @@ def scrublet(
         synthetic doublets. If 1.0, each doublet is created by simply adding
         the UMI counts from two randomly sampled observed transcriptomes. For
         values less than 1, the UMI counts are added and then randomly sampled
-        at the specified rate. 
-    knn_dist_metric 
+        at the specified rate.
+    knn_dist_metric
         Distance metric used when finding nearest neighbors. For list of
         valid values, see the documentation for annoy (if `use_approx_neighbors`
         is True) or sklearn.neighbors.NearestNeighbors (if `use_approx_neighbors`
@@ -90,16 +90,16 @@ def scrublet(
         If True, center the data such that each gene has a mean of 0.
         `sklearn.decomposition.PCA` will be used for dimensionality
         reduction.
-    n_prin_comps 
+    n_prin_comps
         Number of principal components used to embed the transcriptomes prior
-        to k-nearest-neighbor graph construction. 
+        to k-nearest-neighbor graph construction.
     use_approx_neighbors
-        Use approximate nearest neighbor method (annoy) for the KNN 
+        Use approximate nearest neighbor method (annoy) for the KNN
         classifier.
     get_doublet_neighbor_parents
         If True, return (in .uns) the parent transcriptomes that generated the
         doublet neighbors of each observed transcriptome. This information can
-        be used to infer the cell states that generated a given doublet state. 
+        be used to infer the cell states that generated a given doublet state.
     n_neighbors
         Number of neighbors used to construct the KNN graph of observed
         transcriptomes and simulated doublets. If ``None``, this is
@@ -133,7 +133,7 @@ def scrublet(
         ``adata.uns['scrublet']['doublet_scores_sim']``
             Doublet scores for each simulated doublet transcriptome
 
-        ``adata.uns['scrublet']['doublet_parents']`` 
+        ``adata.uns['scrublet']['doublet_parents']``
             Pairs of ``.obs_names`` used to generate each simulated doublet
             transcriptome
 
@@ -143,9 +143,9 @@ def scrublet(
     See also
     --------
     :func:`~scanpy.external.pp.scrublet_simulate_doublets`: Run Scrublet's doublet
-        simulation separately for advanced usage. 
+        simulation separately for advanced usage.
     :func:`~scanpy.external.pl.scrublet_score_distribution`: Plot histogram of doublet
-        scores for observed transcriptomes and simulated doublets. 
+        scores for observed transcriptomes and simulated doublets.
     """
     try:
         import scrublet as sl
@@ -185,7 +185,7 @@ def scrublet(
             pp.highly_variable_genes(adata_obs, subset=True)
         else:
             logged = pp.log1p(adata_obs, copy=True)
-            hvg = pp.highly_variable_genes(logged)
+            _ = pp.highly_variable_genes(logged)
             adata_obs = adata_obs[:, logged.var['highly_variable']]
 
         # Simulate the doublets based on the raw expressions from the normalised
@@ -257,7 +257,7 @@ def _scrublet_call_doublets(
     transcriptomes and simulated doublets. This is a wrapper around the core
     functions of `Scrublet <https://github.com/swolock/scrublet>`__ to allow
     for flexibility in applying Scanpy filtering operations upstream. Unless
-    you know what you're doing you should use the main scrublet() function.    
+    you know what you're doing you should use the main scrublet() function.
 
     .. note::
         More information and bug reports `here
@@ -293,20 +293,20 @@ def _scrublet_call_doublets(
         reduction, unless `mean_center` is True.
     n_prin_comps
         Number of principal components used to embed the transcriptomes prior
-        to k-nearest-neighbor graph construction. 
+        to k-nearest-neighbor graph construction.
     use_approx_neighbors
-        Use approximate nearest neighbor method (annoy) for the KNN 
+        Use approximate nearest neighbor method (annoy) for the KNN
         classifier.
     knn_dist_metric
         Distance metric used when finding nearest neighbors. For list of
         valid values, see the documentation for annoy (if `use_approx_neighbors`
         is True) or sklearn.neighbors.NearestNeighbors (if `use_approx_neighbors`
         is False).
     get_doublet_neighbor_parents
-        If True, return the parent transcriptomes that generated the 
-        doublet neighbors of each observed transcriptome. This information can 
-        be used to infer the cell states that generated a given 
-        doublet state. 
+        If True, return the parent transcriptomes that generated the
+        doublet neighbors of each observed transcriptome. This information can
+        be used to infer the cell states that generated a given
+        doublet state.
     threshold
         Doublet score threshold for calling a transcriptome a doublet. If
         `None`, this is set automatically by looking for the minimum between
@@ -316,7 +316,7 @@ def _scrublet_call_doublets(
         predicted doublets in a 2-D embedding.
     random_state
         Initial state for doublet simulation and nearest neighbors.
-    verbose 
+    verbose
         If True, print progress updates.
 
     Returns
@@ -333,7 +333,7 @@ def _scrublet_call_doublets(
         ``adata.uns['scrublet']['doublet_scores_sim']``
             Doublet scores for each simulated doublet transcriptome
 
-        ``adata.uns['scrublet']['doublet_parents']`` 
+        ``adata.uns['scrublet']['doublet_parents']``
             Pairs of ``.obs_names`` used to generate each simulated doublet transcriptome
 
         ``uns['scrublet']['parameters']``
@@ -453,16 +453,16 @@ def scrublet_simulate_doublets(
         The annotated data matrix of shape ``n_obs`` × ``n_vars``. Rows
         correspond to cells and columns to genes. Genes should have been
         filtered for expression and variability, and the object should contain
-        raw expression of the same dimensions. 
+        raw expression of the same dimensions.
     layer
-        Layer of adata where raw values are stored, or 'X' if values are in .X. 
+        Layer of adata where raw values are stored, or 'X' if values are in .X.
     sim_doublet_ratio
-        Number of doublets to simulate relative to the number of observed 
+        Number of doublets to simulate relative to the number of observed
         transcriptomes. If `None`, self.sim_doublet_ratio is used.
     synthetic_doublet_umi_subsampling
-        Rate for sampling UMIs when creating synthetic doublets. If 1.0, 
-        each doublet is created by simply adding the UMIs from two randomly 
-        sampled observed transcriptomes. For values less than 1, the 
+        Rate for sampling UMIs when creating synthetic doublets. If 1.0,
+        each doublet is created by simply adding the UMIs from two randomly
+        sampled observed transcriptomes. For values less than 1, the
         UMI counts are added and then randomly sampled at the specified
         rate.
 
@@ -471,7 +471,7 @@ def scrublet_simulate_doublets(
     adata : anndata.AnnData with simulated doublets in .X
         if ``copy=True`` it returns or else adds fields to ``adata``:
 
-        ``adata.uns['scrublet']['doublet_parents']`` 
+        ``adata.uns['scrublet']['doublet_parents']``
             Pairs of ``.obs_names`` used to generate each simulated doublet transcriptome
 
         ``uns['scrublet']['parameters']``
@@ -480,9 +480,9 @@ def scrublet_simulate_doublets(
     See also
     --------
     :func:`~scanpy.external.pp.scrublet`: Main way of running Scrublet, runs
-        preprocessing, doublet simulation (this function) and calling. 
+        preprocessing, doublet simulation (this function) and calling.
     :func:`~scanpy.external.pl.scrublet_score_distribution`: Plot histogram of doublet
-        scores for observed transcriptomes and simulated doublets. 
+        scores for observed transcriptomes and simulated doublets.
     """
     try:
         import scrublet as sl

diff --git a/scanpy/external/tl/_trimap.py b/scanpy/external/tl/_trimap.py
@@ -76,7 +76,7 @@ def trimap(
 
     Example
     -------
-    
+
     >>> import scanpy as sc
     >>> import scanpy.external as sce
     >>> pbmc = sc.datasets.pbmc68k_reduced()

diff --git a/scanpy/get/get.py b/scanpy/get/get.py
@@ -6,7 +6,6 @@
 from scipy.sparse import spmatrix
 
 from anndata import AnnData
-import warnings
 
 # --------------------------------------------------------------------------------
 # Plotting data helpers
@@ -96,7 +95,7 @@ def rank_genes_groups_df(
 def _check_indices(
     dim_df: pd.DataFrame,
     alt_index: pd.Index,
-    dim: "Literal['obs', 'var']",
+    dim: "Literal['obs', 'var']",  # noqa: F821  # TODO Does this need fixing?
     keys: List[str],
     alias_index: Optional[pd.Index] = None,
     use_raw: bool = False,
@@ -176,7 +175,7 @@ def _get_array_values(
     X,
     dim_names: pd.Index,
     keys: List[str],
-    axis: "Literal[0, 1]",
+    axis: "Literal[0, 1]",  # noqa: F821  # TODO Does this need fixing?
     backed: bool,
 ):
     # TODO: This should be made easier on the anndata side
-Original file line number
+Diff line change
@@ Expand Up / @@ -76,7 +76,7 @@ def trimap( @@
         Example
         -------
         >>> import scanpy as sc
         >>> import scanpy.external as sce
         >>> pbmc = sc.datasets.pbmc68k_reduced()
@@ Expand Down @@