diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 00000000..5d588928
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,36 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: bug
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Standalone code to reproduce the behavior:
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Actual behavior**
+Include full traceback if applicable.
+
+**scprep version**
+Output of `scprep.__version__`
+
+**Output of `pd.show_versions()`**
+
+<details>
+
+```
+paste pd.show_versions() here
+```
+
+</details>
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 00000000..4a05b865
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: enhancement
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or code snippets related to the feature request here.
diff --git a/.travis.yml b/.travis.yml
index 87946602..96ed5e04 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,6 +17,10 @@
         - gfortran
         - libblas-dev
         - liblapack-dev
+        - libglu1-mesa-dev
+        - freeglut3-dev
+        - mesa-common-dev
+        - libgsl-dev
 
   cache: 
     - pip
@@ -25,7 +29,7 @@
       - $HOME/R/Library
 
   install:
-    - python setup.py install
+    - pip install -U .
 
   before_script:
     - sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 00000000..10ef1ee7
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,76 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+ advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+ address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at krishnaswamylab@gmail.com. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000..4341d1ad
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,32 @@
+
+Contributing to scprep
+============================
+
+There are many ways to contribute to `scprep`, with the most common ones
+being contribution of code or documentation to the project. Improving the
+documentation is no less important than improving the library itself. If you
+find a typo in the documentation, or have made improvements, do not hesitate to
+submit a GitHub pull request.
+
+But there are many other ways to help. In particular answering queries on the
+[issue tracker](https://github.com/KrishnaswamyLab/scprep/issues),
+investigating bugs, and [reviewing other developers' pull
+requests](https://github.com/KrishnaswamyLab/scprep/pulls)
+are very valuable contributions that decrease the burden on the project
+maintainers.
+
+Another way to contribute is to report issues you're facing, and give a "thumbs
+up" on issues that others reported and that are relevant to you. It also helps
+us if you spread the word: reference the project from your blog and articles,
+link to it from your website, or simply star it in GitHub to say "I use it".
+
+Code of Conduct
+---------------
+
+We abide by the principles of openness, respect, and consideration of others
+of the Python Software Foundation: https://www.python.org/psf/codeofconduct/.
+
+Attribution
+---------------
+
+This `CONTRIBUTING.md` was adapted from [scikit-learn](https://github.com/scikit-learn/scikit-learn/blob/master/CONTRIBUTING.md).
\ No newline at end of file
diff --git a/README.rst b/README.rst
index dede2bcb..57dd93ac 100644
--- a/README.rst
+++ b/README.rst
@@ -1,6 +1,5 @@
-=============
-scprep
-=============
+.. image:: logo.png
+    :alt: scprep logo
 
 .. image:: https://img.shields.io/pypi/v/scprep.svg
     :target: https://pypi.org/project/scprep/
@@ -24,8 +23,14 @@ scprep
     :target: https://github.com/KrishnaswamyLab/scprep/
     :alt: GitHub stars
 
+`scprep` provides an all-in-one framework for loading, preprocessing, and plotting matrices in Python, with a focus on single-cell genomics.
 
-Tools for loading and preprocessing biological matrices in Python.
+The philosophy of `scprep`:
+
+* Data shouldn't be hidden in a complex and bespoke class object. `scprep` works with `numpy` arrays, `pandas` data frames, and `scipy` sparse matrices, all of which are popular data formats in Python and accepted as input to most common algorithms.
+* Your analysis pipeline shouldn't have to change based on data format. Changing from a `numpy` array to a `pandas` data frame introduces endless technical differences (e.g. in indexing matrices). `scprep` provides data-agnostic methods that work the same way on all formats.
+* Simple analysis should mean simple code. `scprep` takes care of annoying edge cases and sets nice defaults so you don't have to.
+* Using a framework shouldn't be limiting. Because nothing is hidden from you, you have access to the power of `numpy`, `scipy`, `pandas` and `matplotlib` just as you would if you used them directly.
 
 Installation
 ------------
@@ -72,4 +77,4 @@ Examples
 Help
 ----
 
-If you have any questions or require assistance using scprep, please read the documentation at https://scprep.readthedocs.io/ or contact us at https://krishnaswamylab.org/get-help
\ No newline at end of file
+If you have any questions or require assistance using scprep, please read the documentation at https://scprep.readthedocs.io/ or contact us at https://krishnaswamylab.org/get-help
diff --git a/doc/source/index.rst b/doc/source/index.rst
index a19d46f1..bf1c246d 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -1,6 +1,6 @@
-===========================================================================
-scprep
-===========================================================================
+.. raw:: html
+
+    <a href="https://github.com/KrishnaswamyLab/scprep/"><img src="https://raw.githubusercontent.com/KrishnaswamyLab/scprep/dev/logo.png" alt="scprep logo"></a><br>
 
 .. raw:: html
 
@@ -26,7 +26,14 @@ scprep
 
     <a href="https://github.com/KrishnaswamyLab/scprep/"><img src="https://img.shields.io/github/stars/KrishnaswamyLab/scprep.svg?style=social&label=Stars" alt="GitHub stars"></a>
 
-Tools for building and manipulating graphs in Python.
+`scprep` provides an all-in-one framework for loading, preprocessing, and plotting matrices in Python, with a focus on single-cell genomics.
+
+The philosophy of `scprep`:
+
+* Data shouldn't be hidden in a complex and bespoke class object. `scprep` works with `numpy` arrays, `pandas` data frames, and `scipy` sparse matrices, all of which are popular data formats in Python and accepted as input to most common algorithms.
+* Your analysis pipeline shouldn't have to change based on data format. Changing from a `numpy` array to a `pandas` data frame introduces endless technical differences (e.g. in indexing matrices). `scprep` provides data-agnostic methods that work the same way on all formats.
+* Simple analysis should mean simple code. `scprep` takes care of annoying edge cases and sets nice defaults so you don't have to.
+* Using a framework shouldn't be limiting. Because nothing is hidden from you, you have access to the power of `numpy`, `scipy`, `pandas` and `matplotlib` just as you would if you used them directly.
 
 .. toctree::
     :maxdepth: 2
@@ -63,4 +70,4 @@ You can use `scprep` with your single cell data as follows::
 Help
 ====
 
-If you have any questions or require assistance using scprep, please contact us at https://krishnaswamylab.org/get-help
\ No newline at end of file
+If you have any questions or require assistance using scprep, please contact us at https://krishnaswamylab.org/get-help
diff --git a/doc/source/reference.rst b/doc/source/reference.rst
index 90e274a8..8122ae6d 100644
--- a/doc/source/reference.rst
+++ b/doc/source/reference.rst
@@ -11,6 +11,24 @@ Data Input/Output
     :imported-members:
     :show-inheritance:
 
+HDF5
+~~~~
+
+.. automodule:: scprep.io.hdf5
+    :autosummary:
+    :members:
+    :inherited-members:
+    :show-inheritance:
+
+Download
+~~~~~~~~
+
+.. automodule:: scprep.io.download
+    :autosummary:
+    :members:
+    :inherited-members:
+    :show-inheritance:
+
 Filtering
 ---------
 
@@ -103,10 +121,19 @@ External Tools
     :imported-members:
     :show-inheritance:
 
-HDF5
-----
+Splatter
+~~~~~~~~
 
-.. automodule:: scprep.io.hdf5
+.. automodule:: scprep.run.splatter
+    :autosummary:
+    :members:
+    :inherited-members:
+    :show-inheritance:
+
+Slingshot
+~~~~~~~~~
+
+.. automodule:: scprep.run.slingshot
     :autosummary:
     :members:
     :inherited-members:
diff --git a/doc/source/requirements.txt b/doc/source/requirements.txt
index 58906e83..ae852b76 100644
--- a/doc/source/requirements.txt
+++ b/doc/source/requirements.txt
@@ -1,9 +1,9 @@
-numpy>=1.10.0
-scipy>=0.18.0
+numpy>=1.12.0
+scipy>=0.18.1
 scikit-learn>=0.19.1
-pandas>=0.19.0,<0.24
-decorator
-matplotlib
+pandas>=0.25
+decorator>=4.3.0
+matplotlib>=3.0
 sphinx<=1.8.5
 sphinxcontrib-napoleon
 autodocsumm
diff --git a/logo.png b/logo.png
new file mode 100644
index 00000000..0bb30d36
Binary files /dev/null and b/logo.png differ
diff --git a/requirements.txt b/requirements.txt
index b5819b87..31d927e5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
-numpy>=1.10.0
-scipy>=0.18.0
+numpy>=1.12.0
+scipy>=0.18.1
 scikit-learn>=0.19.1
-pandas>=0.19.0,<0.24
+pandas>=0.25
 decorator>=4.3.0
diff --git a/scprep/__init__.py b/scprep/__init__.py
index 2bbd9f2f..a7a8ffdd 100644
--- a/scprep/__init__.py
+++ b/scprep/__init__.py
@@ -15,11 +15,13 @@
 import scprep.reduce
 import scprep.run
 
-import pandas as _pd
-if int(_pd.__version__.split(".")[1]) < 24:
-    import numpy as _np
-
-    def __rmatmul__(self, other):
-        """ Matrix multiplication using binary `@` operator in Python>=3.5 """
-        return self.dot(_np.transpose(other))
-    _pd.core.series.Series.__rmatmul__ = __rmatmul__
+import pandas as pd
+if int(pd.__version__.split('.')[1]) < 26:
+    def fill_value(self):
+        # Used in reindex_indexer
+        try:
+            return self.values.dtype.fill_value
+        except AttributeError:
+            return self.values.dtype.na_value
+    from pandas.core.internals.blocks import ExtensionBlock
+    setattr(ExtensionBlock, 'fill_value', property(fill_value))
diff --git a/scprep/_lazyload.py b/scprep/_lazyload.py
index 130a88b9..b9357058 100644
--- a/scprep/_lazyload.py
+++ b/scprep/_lazyload.py
@@ -14,7 +14,8 @@
              'rinterface',
              {'rinterface_lib': ['callbacks']}],
     'h5py': [],
-    'tables': []
+    'tables': [],
+    'statsmodels': [{'nonparametric': ['smoothers_lowess']}],
 }
 
 
diff --git a/scprep/filter.py b/scprep/filter.py
index 1737cb14..e816369d 100644
--- a/scprep/filter.py
+++ b/scprep/filter.py
@@ -28,14 +28,14 @@ def remove_empty_cells(data, *extra_data, sample_labels=None):
     warnings.warn("`scprep.filter.remove_empty_cells` is deprecated. "
                   "Use `scprep.filter.filter_empty_cells` instead.",
                   DeprecationWarning)
-    return filter_empty_cells(data, *extra_data)
+    return filter_empty_cells(data, *extra_data, sample_labels=sample_labels)
 
 
 def remove_duplicates(data, *extra_data, sample_labels=None):
     warnings.warn("`scprep.filter.remove_duplicates` is deprecated. "
                   "Use `scprep.filter.filter_duplicates` instead.",
                   DeprecationWarning)
-    return filter_duplicates(data, *extra_data)
+    return filter_duplicates(data, *extra_data, sample_labels=sample_labels)
 
 
 def filter_empty_genes(data, *extra_data):
@@ -120,21 +120,6 @@ def filter_empty_cells(data, *extra_data, sample_labels=None):
     return data
 
 
-def _get_filter_idx(data, values,
-                    cutoff, percentile,
-                    keep_cells):
-    cutoff = measure._get_percentile_cutoff(
-        values, cutoff, percentile, required=True)
-    if keep_cells == 'above':
-        keep_cells_idx = values > cutoff
-    elif keep_cells == 'below':
-        keep_cells_idx = values < cutoff
-    else:
-        raise ValueError("Expected `keep_cells` in ['above', 'below']. "
-                         "Got {}".format(keep_cells))
-    return keep_cells_idx
-
-
 def filter_values(data, *extra_data, values=None,
                   cutoff=None, percentile=None,
                   keep_cells='above',
@@ -188,9 +173,9 @@ def filter_values(data, *extra_data, values=None,
                       "Filtering as a single sample.",
                       DeprecationWarning)
     assert values is not None
-    keep_cells_idx = _get_filter_idx(data, values,
-                                     cutoff, percentile,
-                                     keep_cells)
+    keep_cells_idx = utils._get_filter_idx(values,
+                                          cutoff, percentile,
+                                          keep_cells)
     if return_values:
         extra_data = [values] + list(extra_data)
     data = select.select_rows(data, *extra_data, idx=keep_cells_idx)
@@ -303,7 +288,7 @@ def filter_gene_set_expression(data, *extra_data, genes=None,
         Filtered extra data, if passed.
     """
     cell_sums = measure.gene_set_expression(
-        data, genes,
+        data, genes=genes,
         starts_with=starts_with, ends_with=ends_with,
         exact_word=exact_word, regex=regex,
         library_size_normalize=library_size_normalize)
@@ -330,6 +315,8 @@ def _find_unique_cells(data):
     """
     if isinstance(data, pd.SparseDataFrame):
         unique_idx = _find_unique_cells(data.to_coo())
+    elif utils.is_sparse_dataframe(data):
+        unique_idx = _find_unique_cells(data.sparse.to_coo())
     elif isinstance(data, pd.DataFrame):
         unique_idx = ~data.duplicated()
     elif isinstance(data, np.ndarray):
diff --git a/scprep/io/__init__.py b/scprep/io/__init__.py
index 18446deb..d34bd92c 100644
--- a/scprep/io/__init__.py
+++ b/scprep/io/__init__.py
@@ -4,4 +4,6 @@
 from .csv import load_csv, load_tsv
 from .tenx import load_10X, load_10X_zip, load_10X_HDF5
 from .fcs import load_fcs
-from .mtx import load_mtx
+from .mtx import load_mtx, save_mtx
+
+from . import download, hdf5
diff --git a/scprep/io/csv.py b/scprep/io/csv.py
index 6e65c51b..bf17bc55 100644
--- a/scprep/io/csv.py
+++ b/scprep/io/csv.py
@@ -4,13 +4,14 @@
 import pandas as pd
 
 from .utils import _matrix_to_data_frame
+from .. import utils
 
 
 def _read_csv_sparse(filename, chunksize=1000000, fill_value=0.0, **kwargs):
-    """Read a csv file into a pandas.SparseDataFrame
+    """Read a csv file into a pd.DataFrame[pd.SparseArray]
     """
     chunks = pd.read_csv(filename, chunksize=chunksize, **kwargs)
-    data = pd.concat(chunk.to_sparse(fill_value=fill_value)
+    data = pd.concat(utils.dataframe_to_sparse(chunk, fill_value=fill_value)
                      for chunk in chunks)
     return data
 
@@ -36,7 +37,7 @@ def load_csv(filename, cell_axis='row', delimiter=',',
         If `True`, we assume cell names are in the first row/column. Otherwise
         expects a filename or an array containing a list of cell barcodes.
     sparse : bool, optional (default: False)
-        If True, loads the data as a pd.SparseDataFrame. This uses less memory
+        If True, loads the data as a pd.DataFrame[pd.SparseArray]. This uses less memory
         but more CPU.
     **kwargs : optional arguments for `pd.read_csv`.
 
@@ -44,7 +45,7 @@ def load_csv(filename, cell_axis='row', delimiter=',',
     -------
     data : array-like, shape=[n_samples, n_features]
         If either gene or cell names are given, data will be a pd.DataFrame or
-        pd.SparseDataFrame. If no names are given, data will be a np.ndarray
+        pd.DataFrame[pd.SparseArray]. If no names are given, data will be a np.ndarray
         or scipy.sparse.spmatrix
     """
     if cell_axis not in ['row', 'column', 'col']:
@@ -113,7 +114,7 @@ def load_tsv(filename, cell_axis='row', delimiter='\t',
         If `True`, we assume cell names are in the first row/column. Otherwise
         expects a filename or an array containing a list of cell barcodes.
     sparse : bool, optional (default: False)
-        If True, loads the data as a pd.SparseDataFrame. This uses less memory
+        If True, loads the data as a pd.DataFrame[pd.SparseArray]. This uses less memory
         but more CPU.
     **kwargs : optional arguments for `pd.read_csv`.
 
@@ -121,7 +122,7 @@ def load_tsv(filename, cell_axis='row', delimiter='\t',
     -------
     data : array-like, shape=[n_samples, n_features]
         If either gene or cell names are given, data will be a pd.DataFrame or
-        pd.SparseDataFrame. If no names are given, data will be a np.ndarray
+        pd.DataFrame[pd.SparseArray]. If no names are given, data will be a np.ndarray
         or scipy.sparse.spmatrix
     """
     return load_csv(filename, cell_axis=cell_axis, delimiter=delimiter,
diff --git a/scprep/io/download.py b/scprep/io/download.py
new file mode 100644
index 00000000..5072c31c
--- /dev/null
+++ b/scprep/io/download.py
@@ -0,0 +1,123 @@
+import requests
+import zipfile
+import tempfile
+import os
+import urllib.request
+
+_CHUNK_SIZE = 32768
+_GOOGLE_DRIVE_URL = "https://docs.google.com/uc?export=download"
+_FAKE_HEADERS = [('User-Agent', 'Mozilla/5.0')]
+
+
+def _save_response_content(response, destination):
+    global _CHUNK_SIZE
+    if isinstance(destination, str):
+        with open(destination, 'wb') as handle:
+            _save_response_content(response, handle)
+    else:
+        for chunk in response.iter_content(_CHUNK_SIZE):
+            if chunk: # filter out keep-alive new chunks
+                destination.write(chunk)
+
+
+def _google_drive_confirm_token(response):
+    for key, value in response.cookies.items():
+        if key.startswith('download_warning'):
+            return value
+    return None
+
+
+def _GET_google_drive(id):
+    """Post a GET request to Google Drive"""
+    global _GOOGLE_DRIVE_URL
+
+    with requests.Session() as session:
+        response = session.get(_GOOGLE_DRIVE_URL, params = { 'id' : id }, stream = True)
+        token = _google_drive_confirm_token(response)
+
+        if token:
+            params = { 'id' : id, 'confirm' : token }
+            response = session.get(_GOOGLE_DRIVE_URL, params = params, stream = True)
+    return response
+
+
+def download_google_drive(id, destination):
+    """Download a file from Google Drive
+    
+    Requires the file to be available to view by anyone with the URL.
+    
+    Parameters
+    ----------
+    id : string
+        Google Drive ID string. You can access this by clicking 'Get Shareable Link',
+        which will give a URL of the form
+        <https://drive.google.com/file/d/**your_file_id**/view?usp=sharing>
+    destination : string or file
+        File to which to save the downloaded data
+    """
+    response = _GET_google_drive(id)
+    _save_response_content(response, destination)
+
+
+def download_url(url, destination):
+    """Download a file from a URL
+
+    Parameters
+    ----------
+    url : string
+        URL of file to be downloaded
+    destination : string or file
+        File to which to save the downloaded data
+    """
+    if isinstance(destination, str):
+        with open(destination, 'wb') as handle:
+            download_url(url, handle)
+    else:
+        # destination is File
+        opener = urllib.request.build_opener()
+        opener.addheaders = _FAKE_HEADERS
+        urllib.request.install_opener(opener)
+        with urllib.request.urlopen(url) as handle:
+            destination.write(handle.read())
+
+
+def unzip(filename, destination=None, delete=True):
+    """Extract a .zip file and optionally remove the archived version
+
+    Parameters
+    ----------
+    filename : string
+        Path to the zip file
+    destination : string, optional (default: None)
+        Path to the folder in which to extract the zip.
+        If None, extracts to the same directory the archive is in.
+    delete : boolean, optional (default: True)
+        If True, deletes the zip file after extraction
+    """
+    filename = os.path.expanduser(filename)
+    if destination is None:
+        destination = os.path.dirname(filename)
+    elif not os.path.isdir(destination):
+        os.mkdir(destination)
+    with zipfile.ZipFile(filename, 'r') as handle:
+        handle.extractall(destination)
+    if delete:
+        os.unlink(filename)
+
+
+def download_and_extract_zip(url, destination):
+    """Download a .zip file from a URL and extract it
+
+    Parameters
+    ----------
+    url : string
+        URL of file to be downloaded
+    destination : string
+        Directory in which to extract the downloaded zip
+    """
+    if not os.path.isdir(destination):
+        os.mkdir(destination)
+    zip_handle = tempfile.NamedTemporaryFile(suffix=".zip", delete=False)
+    download_url(url, zip_handle)
+    zip_handle.close()
+    unzip(zip_handle.name, destination)
diff --git a/scprep/io/fcs.py b/scprep/io/fcs.py
index dd398f67..c0edf180 100644
--- a/scprep/io/fcs.py
+++ b/scprep/io/fcs.py
@@ -242,7 +242,7 @@ def load_fcs(filename, gene_names=True, cell_names=True,
         If `True`, we assume cell names are contained in the file. Otherwise
         expects a filename or an array containing a list of cell barcodes.
     sparse : bool, optional (default: None)
-        If True, loads the data as a pd.SparseDataFrame. This uses less memory
+        If True, loads the data as a pd.DataFrame[SparseArray]. This uses less memory
         but more CPU.
     metadata_channels : list-like, optional, shape=[n_meta] (default: ['Time', 'Event_length', 'DNA1', 'DNA2', 'Cisplatin', 'beadDist', 'bead1'])
         Channels to be excluded from the data
@@ -273,7 +273,7 @@ def load_fcs(filename, gene_names=True, cell_names=True,
         Values from metadata channels
     data : array-like, shape=[n_samples, n_features]
         If either gene or cell names are given, data will be a pd.DataFrame or
-        pd.SparseDataFrame. If no names are given, data will be a np.ndarray
+        pd.DataFrame[SparseArray]. If no names are given, data will be a np.ndarray
         or scipy.sparse.spmatrix
     """
     if cell_names is True:
diff --git a/scprep/io/mtx.py b/scprep/io/mtx.py
index eb4d114e..6f1bd2a8 100644
--- a/scprep/io/mtx.py
+++ b/scprep/io/mtx.py
@@ -2,7 +2,12 @@
 # (C) 2018 Krishnaswamy Lab GPLv2
 
 import scipy.io as sio
+from scipy import sparse
+import pandas as pd
+import os
+
 from .utils import _matrix_to_data_frame
+from .. import utils
 
 
 def load_mtx(mtx_file, cell_axis='row',
@@ -21,14 +26,14 @@ def load_mtx(mtx_file, cell_axis='row',
     cell_names : `str`, array-like, or `None` (default: None)
         Expects a filename or an array containing a list of cell barcodes.
     sparse : bool, optional (default: None)
-        If True, loads the data as a pd.SparseDataFrame. This uses less memory
+        If True, loads the data as a pd.DataFrame[pd.SparseArray]. This uses less memory
         but more CPU.
 
     Returns
     -------
     data : array-like, shape=[n_samples, n_features]
         If either gene or cell names are given, data will be a pd.DataFrame or
-        pd.SparseDataFrame. If no names are given, data will be a np.ndarray
+        pd.DataFrame[pd.SparseArray]. If no names are given, data will be a np.ndarray
         or scipy.sparse.spmatrix
     """
     if cell_axis not in ['row', 'column', 'col']:
@@ -43,3 +48,50 @@ def load_mtx(mtx_file, cell_axis='row',
         data, gene_names=gene_names,
         cell_names=cell_names, sparse=sparse)
     return data
+
+def save_mtx(data, destination, cell_names=None, gene_names=None):
+    """Save a mtx file
+    
+    Parameters
+    ----------
+    data : array-like, shape=[n_samples, n_features]
+        Input data, saved to destination/matrix.mtx
+    destination : str
+        Directory in which to save the data
+    cell_names : list-like, shape=[n_samples], optional (default: None)
+        Cell names associated with rows, saved to destination/cell_names.tsv.
+        If `data` is a pandas DataFrame and `cell_names` is None,
+        these are autopopulated from `data.index`.
+    gene_names : list-like, shape=[n_features], optional (default: None)
+        Cell names associated with rows, saved to destination/gene_names.tsv.
+        If `data` is a pandas DataFrame and `gene_names` is None,
+        these are autopopulated from `data.columns`.
+
+    Examples
+    --------
+    >>> import scprep
+    >>> scprep.io.save_mtx(data, destination="my_data")
+    >>> reload = scprep.io.load_mtx("my_data/matrix.mtx",
+    ...                             cell_names="my_data/cell_names.tsv",
+    ...                             gene_names="my_data/gene_names.tsv")
+    """
+    if isinstance(data, pd.DataFrame):
+        if cell_names is None:
+            cell_names = data.index
+        if gene_names is None:
+            gene_names = data.columns
+    data = utils.to_array_or_spmatrix(data)
+    data = sparse.coo_matrix(data)
+    # handle ~/ and relative paths
+    destination = os.path.expanduser(destination)
+    if not os.path.isdir(destination):
+        os.mkdir(destination)
+    if cell_names is not None:
+        with open(os.path.join(destination, "cell_names.tsv"), 'w') as handle:
+            for name in cell_names:
+                handle.write("{}\n".format(name))
+    if gene_names is not None:
+        with open(os.path.join(destination, "gene_names.tsv"), 'w') as handle:
+            for name in gene_names:
+                handle.write("{}\n".format(name))
+    sio.mmwrite(os.path.join(destination, "matrix.mtx"), data)
diff --git a/scprep/io/tenx.py b/scprep/io/tenx.py
index 6da8f90b..aaa8e8e8 100644
--- a/scprep/io/tenx.py
+++ b/scprep/io/tenx.py
@@ -40,18 +40,18 @@ def _combine_gene_id(symbols, ids):
 def _parse_10x_genes(symbols, ids, gene_labels='symbol',
                      allow_duplicates=True):
     assert gene_labels in ['symbol', 'id', 'both']
-    if gene_labels == 'both':
-        columns = _combine_gene_id(symbols, ids)
     if gene_labels == 'symbol':
         columns = symbols
         if not allow_duplicates and len(np.unique(columns)) < len(columns):
             warnings.warn(
-                "Duplicate gene names detected! Forcing `gene_labels='id'`. "
-                "Alternatively, try `gene_labels='both'`, "
+                "Duplicate gene names detected! Forcing `gene_labels='both'`. "
+                "Alternatively, try `gene_labels='id'`, "
                 "`allow_duplicates=True`, or load the matrix"
                 " with `sparse=False`", RuntimeWarning)
-            gene_labels = 'id'
-    if gene_labels == 'id':
+            gene_labels = 'both'
+    if gene_labels == 'both':
+        columns = _combine_gene_id(symbols, ids)
+    elif gene_labels == 'id':
         columns = ids
     return columns
 
@@ -95,7 +95,7 @@ def load_10X(data_dir, sparse=True, gene_labels='symbol',
     Returns
     -------
     data: array-like, shape=[n_samples, n_features]
-        If sparse, data will be a pd.SparseDataFrame. Otherwise, data will
+        If sparse, data will be a pd.DataFrame[pd.SparseArray]. Otherwise, data will
         be a pd.DataFrame.
     """
 
@@ -168,7 +168,7 @@ def load_10X_zip(filename, sparse=True, gene_labels='symbol',
     Returns
     -------
     data: array-like, shape=[n_samples, n_features]
-        If sparse, data will be a pd.SparseDataFrame. Otherwise, data will
+        If sparse, data will be a pd.DataFrame[pd.SparseArray]. Otherwise, data will
         be a pd.DataFrame.
     """
 
@@ -247,7 +247,7 @@ def load_10X_HDF5(filename, genome=None, sparse=True, gene_labels='symbol',
     Returns
     -------
     data: array-like, shape=[n_samples, n_features]
-        If sparse, data will be a pd.SparseDataFrame. Otherwise, data will
+        If sparse, data will be a pd.DataFrame[pd.SparseArray]. Otherwise, data will
         be a pd.DataFrame.
     """
 
diff --git a/scprep/io/utils.py b/scprep/io/utils.py
index 30a6eafa..53eb9f70 100644
--- a/scprep/io/utils.py
+++ b/scprep/io/utils.py
@@ -6,6 +6,8 @@
 import warnings
 import numpy as np
 
+from .. import utils
+
 
 def _parse_header(header, n_expected, header_type="gene_names"):
     """
@@ -93,7 +95,7 @@ def _matrix_to_data_frame(data, gene_names=None, cell_names=None, sparse=None):
         # dataframe with index and/or columns
         if sparse is None:
             # let the input data decide
-            sparse = isinstance(data, pd.SparseDataFrame) or sp.issparse(data)
+            sparse = utils.is_sparse_dataframe(data) or sp.issparse(data)
         if sparse and gene_names is not None and \
                 len(np.unique(gene_names)) < len(gene_names):
             warnings.warn(
@@ -101,18 +103,19 @@ def _matrix_to_data_frame(data, gene_names=None, cell_names=None, sparse=None):
                 RuntimeWarning)
             sparse = False
         if sparse:
-            # return pandas.SparseDataFrame
+            # return pandas.DataFrame[SparseArray]
             if isinstance(data, pd.DataFrame):
                 if gene_names is not None:
                     data.columns = gene_names
                 if cell_names is not None:
                     data.index = cell_names
-                if not isinstance(data, pd.SparseDataFrame):
-                    data = data.to_sparse(fill_value=0.0)
+                if not utils.is_sparse_dataframe(data):
+                    data = utils.dataframe_to_sparse(data, fill_value=0.0)
+            elif sp.issparse(data):
+                data = pd.DataFrame.sparse.from_spmatrix(data, index=cell_names, columns=gene_names)
             else:
-                data = pd.SparseDataFrame(data, default_fill_value=0.0)
-                data.index = cell_names
-                data.columns = gene_names
+                data = pd.DataFrame(data, index=cell_names, columns=gene_names)
+                data = utils.dataframe_to_sparse(data, fill_value=0.0)
         else:
             # return pandas.DataFrame
             if isinstance(data, pd.DataFrame):
@@ -120,8 +123,8 @@ def _matrix_to_data_frame(data, gene_names=None, cell_names=None, sparse=None):
                     data.columns = gene_names
                 if cell_names is not None:
                     data.index = cell_names
-                if isinstance(data, pd.SparseDataFrame):
-                    data = data.to_dense()
+                if utils.is_sparse_dataframe(data):
+                    data = data.sparse.to_dense()
             else:
                 if sp.issparse(data):
                     data = data.toarray()
diff --git a/scprep/measure.py b/scprep/measure.py
index 27a18f8b..fda2347c 100644
--- a/scprep/measure.py
+++ b/scprep/measure.py
@@ -1,8 +1,11 @@
 import numpy as np
+import pandas as pd
 import warnings
 import numbers
+import scipy.signal
 
 from . import utils, select
+from ._lazyload import statsmodels
 
 
 def library_size(data):
@@ -19,6 +22,8 @@ def library_size(data):
         Sum over all genes for each cell
     """
     library_size = utils.matrix_sum(data, axis=1)
+    if isinstance(library_size, pd.Series):
+        library_size.name = 'library_size'
     return library_size
 
 
@@ -59,45 +64,55 @@ def gene_set_expression(data, genes=None, library_size_normalize=False,
         gene_set_expression = library_size(gene_data)
     else:
         gene_set_expression = gene_data
+    if isinstance(gene_set_expression, pd.Series):
+        gene_set_expression.name = 'expression'
     return gene_set_expression
 
 
-def _get_percentile_cutoff(data, cutoff=None, percentile=None, required=False):
-    """Get a cutoff for a dataset
+@utils._with_pkg(pkg="statsmodels")
+def gene_variability(data, span=0.7, interpolate=0.2, kernel_size=0.05, return_means=False):
+    """Measure the variability of each gene in a dataset
+
+    Variability is computed as the deviation from a loess fit
+    to the rolling median of the mean-variance curve
 
     Parameters
     ----------
-    data : array-like
-    cutoff : float or None, optional (default: None)
-        Absolute cutoff value. Only one of cutoff and percentile may be given
-    percentile : float or None, optional (default: None)
-        Percentile cutoff value between 0 and 100.
-        Only one of cutoff and percentile may be given
-    required : bool, optional (default: False)
-        If True, one of cutoff and percentile must be given.
+    data : array-like, shape=[n_samples, n_features]
+        Input data
+    span : float, optional (default: 0.7)
+        Fraction of genes to use when computing the loess estimate at each point
+    interpolate : float, optional (default: 0.2)
+        Multiple of the standard deviation of variances at which to interpolate
+        linearly in order to reduce computation time.
+    kernel_size : float or int, optional (default: 0.05)
+        Width of rolling median window. If a float between 0 and 1, the width is given by
+        kernel_size * data.shape[1]. Otherwise should be an odd integer
+    return_means : boolean, optional (default: False)
+        If True, return the gene means
 
     Returns
     -------
-    cutoff : float or None
-        Absolute cutoff value. Can only be None if required is False and
-        cutoff and percentile are both None.
+    variability : list-like, shape=[n_samples]
+        Variability for each gene
     """
-    if percentile is not None:
-        if cutoff is not None:
-            raise ValueError(
-                "Only one of `cutoff` and `percentile` should be given."
-                "Got cutoff={}, percentile={}".format(cutoff, percentile))
-        if not isinstance(percentile, numbers.Number):
-            return [_get_percentile_cutoff(data, percentile=p)
-                    for p in percentile]
-        if percentile < 1:
-            warnings.warn(
-                "`percentile` expects values between 0 and 100."
-                "Got {}. Did you mean {}?".format(percentile,
-                                                  percentile * 100),
-                UserWarning)
-        cutoff = np.percentile(np.array(data).reshape(-1), percentile)
-    elif cutoff is None and required:
-        raise ValueError(
-            "One of either `cutoff` or `percentile` must be given.")
-    return cutoff
+    columns = data.columns if isinstance(data, pd.DataFrame) else None
+    data = utils.to_array_or_spmatrix(data)
+    data_std = utils.matrix_std(data, axis=0) ** 2
+    if kernel_size < 1:
+        kernel_size = 2*(int(kernel_size * len(data_std))//2)+1
+    order = np.argsort(data_std)
+    data_std_med = np.empty_like(data_std)
+    data_std_med[order] = scipy.signal.medfilt(data_std[order], kernel_size=kernel_size)
+    data_mean = utils.toarray(np.mean(data, axis=0)).flatten()
+    delta = np.std(data_std_med) * interpolate
+    lowess = statsmodels.nonparametric.smoothers_lowess.lowess(
+        data_std_med, data_mean,
+        delta=delta, frac=span, return_sorted=False)
+    result = data_std - lowess
+    if columns is not None:
+        result = pd.Series(result, index=columns, name='variability')
+        data_mean = pd.Series(data_mean, index=columns, name='mean')
+    if return_means:
+        result = result, data_mean
+    return result
diff --git a/scprep/normalize.py b/scprep/normalize.py
index 69501612..ef83852a 100644
--- a/scprep/normalize.py
+++ b/scprep/normalize.py
@@ -34,7 +34,7 @@ def _get_scaled_libsize(data, rescale='median', return_library_size=False):
     return rescale, libsize
 
 
-def library_size_normalize(data, rescale='median',
+def library_size_normalize(data, rescale=10000,
                            return_library_size=False):
     """Performs L1 normalization on input data
     Performs L1 normalization on input data such that the sum of expression
@@ -46,7 +46,7 @@ def library_size_normalize(data, rescale='median',
     ----------
     data : array-like, shape=[n_samples, n_features]
         Input data
-    rescale : {'mean', 'median'}, float or `None`, optional (default: 'median')
+    rescale : {'mean', 'median'}, float or `None`, optional (default: 10000)
         Rescaling strategy. If 'mean' or 'median', normalized cells are scaled
         back up to the mean or median expression value. If a float,
         normalized cells are scaled up to the given value. If `None`, no
@@ -64,13 +64,15 @@ def library_size_normalize(data, rescale='median',
     """
     # pandas support
     columns, index = None, None
-    if isinstance(data, pd.SparseDataFrame) or \
-            pd.api.types.is_sparse(data):
+    if isinstance(data, pd.DataFrame):
         columns, index = data.columns, data.index
-        data = data.to_coo()
-    elif isinstance(data, pd.DataFrame):
-        columns, index = data.columns, data.index
-        data = data.values
+        if utils.is_sparse_dataframe(data):
+            data = data.sparse.to_coo()
+        elif isinstance(data, pd.SparseDataFrame):
+            data = data.to_coo()
+        else:
+            # dense data
+            data = data.to_numpy()
 
     calc_libsize = sparse.issparse(data) and (return_library_size or
                                               data.nnz > 2**31)
@@ -91,7 +93,7 @@ def library_size_normalize(data, rescale='median',
     if columns is not None:
         # pandas dataframe
         if sparse.issparse(data_norm):
-            data_norm = pd.SparseDataFrame(data_norm, default_fill_value=0.0)
+            data_norm = utils.SparseDataFrame(data_norm, default_fill_value=0.0)
         else:
             data_norm = pd.DataFrame(data_norm)
         data_norm.columns = columns
@@ -120,7 +122,7 @@ def batch_mean_center(data, sample_idx=None):
     data : array-like, shape=[n_samples, n_features]
         Batch mean-centered output data.
     """
-    if sparse.issparse(data) or isinstance(data, pd.SparseDataFrame):
+    if sparse.issparse(data) or isinstance(data, pd.SparseDataFrame) or utils.is_sparse_dataframe(data):
         raise ValueError("Cannot mean center sparse data. "
                          "Convert to dense matrix first.")
     if sample_idx is None:
diff --git a/scprep/plot/__init__.py b/scprep/plot/__init__.py
index 1a7ca181..582a8a0d 100644
--- a/scprep/plot/__init__.py
+++ b/scprep/plot/__init__.py
@@ -3,4 +3,5 @@
 from .marker import marker_plot
 from .scree import scree_plot
 from .jitter import jitter
+from .variable_genes import plot_gene_variability
 from . import tools, colors
diff --git a/scprep/plot/colors.py b/scprep/plot/colors.py
index bdbc347b..4d688dd5 100644
--- a/scprep/plot/colors.py
+++ b/scprep/plot/colors.py
@@ -74,3 +74,48 @@ def tab40():
     colors = np.vstack([mpl.cm.tab20c.colors,
                         mpl.cm.tab20b.colors])
     return mpl.colors.ListedColormap(colors)
+
+
+def tab(n=10):
+    """A discrete colormap with an arbitrary number of colors
+
+    This colormap chooses the best of the following, in order:
+    - `plt.cm.tab10`
+    - `plt.cm.tab20`
+    - `scprep.plot.colors.tab30`
+    - `scprep.plot.colors.tab40`
+    - `scprep.plot.colors.tab10_continuous`
+
+    If the number of colors required is less than the number of colors
+    available, colors are selected specifically in order to reduce similarity
+    between selected colors.
+
+    Parameters
+    ----------
+    n : int, optional (default: 10)
+        Number of required colors.
+
+    Returns
+    -------
+    cmap : `matplotlib.colors.ListedColormap`
+    """
+    if n < 1:
+        raise ValueError(
+            "Expected n >= 1. Got {}".format(n))
+    n_shades = int(np.ceil(n / 10))
+    if n_shades == 1:
+        cmap = mpl.cm.tab10
+    elif n_shades == 2:
+        cmap = mpl.cm.tab20
+    elif n_shades == 3:
+        cmap = tab30()
+    elif n_shades == 4:
+        cmap = tab40()
+    else:
+        cmap = tab10_continuous(n_colors=10, n_step=n_shades)
+    # restrict to n values
+    if n > 1 and n < cmap.N:
+        select_idx = np.tile(np.arange(10), n_shades) * \
+            n_shades + np.repeat(np.arange(n_shades), 10)
+        cmap = mpl.colors.ListedColormap(np.array(cmap.colors)[select_idx[:n]])
+    return cmap
diff --git a/scprep/plot/histogram.py b/scprep/plot/histogram.py
index 0dfb30df..e3c43dc8 100644
--- a/scprep/plot/histogram.py
+++ b/scprep/plot/histogram.py
@@ -1,5 +1,6 @@
 import numpy as np
 import numbers
+import warnings
 
 from .. import measure, utils
 from .utils import (_get_figure, show,
@@ -18,6 +19,8 @@ def histogram(data,
               fontsize=None,
               histtype='stepfilled',
               alpha=None,
+              filename=None,
+              dpi=None,
               **kwargs):
     """Plot a histogram.
 
@@ -55,6 +58,12 @@ def histogram(data,
         'stepfilled' generates a lineplot that is by default filled.
     alpha : float, optional (default: 1 for a single dataset, 0.5 for multiple)
         Histogram transparency
+    filename : str or None (default: None)
+        file to which the output is saved
+    dpi : int or None, optional (default: None)
+        The resolution in dots per inch. If None it will default to the value
+        savefig.dpi in the matplotlibrc file. If 'figure' it will set the dpi
+        to be the value of the figure. Only used if filename is not None.
     **kwargs : additional arguments for `matplotlib.pyplot.hist`
 
     Returns
@@ -78,8 +87,20 @@ def histogram(data,
             if alpha is None:
                 alpha = 1
         if log == 'x' or log is True:
-            bins = np.logspace(np.log10(max(xmin, 1)),
-                               np.log10(xmax),
+            if xmax < np.finfo('float').eps:
+                raise ValueError("Expected positive data for log = {}. "
+                                 "Got max(data) = {:.2f}".format(log, xmax))
+            elif xmin < np.finfo('float').eps:
+                warnings.warn("Expected positive data for log = {}. "
+                              "Got min(data) = {:.2f}".format(log, xmin), UserWarning)
+                xmin = np.finfo('float').eps
+            xmin = np.log10(xmin)
+            xmax = np.log10(xmax)
+            xrange = max(xmax - xmin, 1)
+            xmin = xmin - xrange * 0.1
+            xmax = xmax + xrange * 0.1
+            bins = np.logspace(xmin,
+                               xmax,
                                bins)
         ax.hist(data, bins=bins, histtype=histtype, alpha=alpha, **kwargs)
 
@@ -94,7 +115,7 @@ def histogram(data,
         if title is not None:
             ax.set_title(title, fontsize=parse_fontsize(None, 'xx-large'))
 
-        cutoff = measure._get_percentile_cutoff(
+        cutoff = utils._get_percentile_cutoff(
             data, cutoff, percentile, required=False)
         if cutoff is not None:
             if isinstance(cutoff, numbers.Number):
@@ -102,8 +123,11 @@ def histogram(data,
             else:
                 for c in cutoff:
                     ax.axvline(c, color='red')
+        # save and show
         if show_fig:
             show(fig)
+        if filename is not None:
+            fig.savefig(filename, dpi=dpi)
     return ax
 
 
@@ -115,6 +139,8 @@ def plot_library_size(data,
                       xlabel='Library size',
                       title=None,
                       fontsize=None,
+                      filename=None,
+                      dpi=None,
                       **kwargs):
     """Plot the library size histogram.
 
@@ -144,6 +170,12 @@ def plot_library_size(data,
         Axis title.
     fontsize : float or None (default: None)
         Base font size.
+    filename : str or None (default: None)
+        file to which the output is saved
+    dpi : int or None, optional (default: None)
+        The resolution in dots per inch. If None it will default to the value
+        savefig.dpi in the matplotlibrc file. If 'figure' it will set the dpi
+        to be the value of the figure. Only used if filename is not None.
     **kwargs : additional arguments for `matplotlib.pyplot.hist`
 
     Returns
@@ -161,7 +193,8 @@ def plot_library_size(data,
     return histogram(libsize,
                      cutoff=cutoff, percentile=percentile,
                      bins=bins, log=log, ax=ax, figsize=figsize,
-                     xlabel=xlabel, title=title, fontsize=fontsize, **kwargs)
+                     xlabel=xlabel, title=title, fontsize=fontsize,
+                     filename=filename, dpi=dpi, **kwargs)
 
 
 @utils._with_pkg(pkg="matplotlib", min_version=3)
@@ -175,6 +208,8 @@ def plot_gene_set_expression(data, genes=None,
                              xlabel='Gene expression',
                              title=None,
                              fontsize=None,
+                             filename=None,
+                             dpi=None,
                              **kwargs):
     """Plot the histogram of the expression of a gene set.
 
@@ -216,6 +251,12 @@ def plot_gene_set_expression(data, genes=None,
         Axis title.
     fontsize : float or None (default: None)
         Base font size.
+    filename : str or None (default: None)
+        file to which the output is saved
+    dpi : int or None, optional (default: None)
+        The resolution in dots per inch. If None it will default to the value
+        savefig.dpi in the matplotlibrc file. If 'figure' it will set the dpi
+        to be the value of the figure. Only used if filename is not None.
     **kwargs : additional arguments for `matplotlib.pyplot.hist`
 
     Returns
@@ -245,4 +286,5 @@ def plot_gene_set_expression(data, genes=None,
     return histogram(expression,
                      cutoff=cutoff, percentile=percentile,
                      bins=bins, log=log, ax=ax, figsize=figsize,
-                     xlabel=xlabel, title=title, fontsize=fontsize, **kwargs)
+                     xlabel=xlabel, title=title, fontsize=fontsize,
+                     filename=filename, dpi=dpi, **kwargs)
diff --git a/scprep/plot/jitter.py b/scprep/plot/jitter.py
index 3b75f422..3e58ccd7 100644
--- a/scprep/plot/jitter.py
+++ b/scprep/plot/jitter.py
@@ -30,7 +30,7 @@ def x_coords(self):
 def jitter(labels, values, sigma=0.1,
            c=None, cmap=None, cmap_scale='linear',
            s=None,
-           plot_means=True, means_s=100, means_c='xkcd:light lavender',
+           plot_means=True, means_s=100, means_c='lightgrey',
            discrete=None,
            ax=None,
            legend=None, colorbar=None,
@@ -84,7 +84,7 @@ def jitter(labels, values, sigma=0.1,
         If True, plot the mean value for each label.
     means_s : float, optional (default: 100)
         Point size for mean values.
-    means_c : string, list-like or matplotlib color, optional (default: 'xkcd:light lavender')
+    means_c : string, list-like or matplotlib color, optional (default: 'lightgrey')
         Point color(s) for mean values.
     discrete : bool or None, optional (default: None)
         If True, the legend is categorical. If False, the legend is a colorbar.
@@ -113,13 +113,8 @@ def jitter(labels, values, sigma=0.1,
         If a list, sets custom axis tick labels
     {x,y}ticklabels : True, False, or list-like (default: None)
         If set, overrides `ticklabels`
-    label_prefix : str or None (default: None)
-        Prefix for all axis labels. Axes will be labelled `label_prefix`1,
-        `label_prefix`2, etc. Can be overriden by setting `xlabel`,
-        `ylabel`, and `zlabel`.
     {x,y}label : str or None (default : None)
-        Axis labels. Overrides the automatic label given by
-        label_prefix. If None and label_prefix is None, no label is set.
+        Axis labels. If None, no label is set.
     title : str or None (default: None)
         axis title. If None, no title is set.
     fontsize : float or None (default: None)
@@ -157,7 +152,8 @@ def jitter(labels, values, sigma=0.1,
             labels, values, c=c, discrete=discrete,
             cmap=cmap, cmap_scale=cmap_scale,
             vmin=vmin, vmax=vmax, s=s,
-            legend=legend, colorbar=colorbar)
+            legend=legend, colorbar=colorbar,
+            xlabel=xlabel, ylabel=ylabel)
 
         fig, ax, show_fig = _get_figure(
             ax, figsize, subplot_kw=params.subplot_kw)
@@ -190,9 +186,9 @@ def jitter(labels, values, sigma=0.1,
                 xticklabels = params.x_labels
 
         # label axes
-        label_axis(ax.xaxis, xticks, xticklabels, xlabel)
+        label_axis(ax.xaxis, xticks, xticklabels, params.xlabel)
         label_axis(ax.yaxis, _with_default(yticks, ticks),
-                   _with_default(yticklabels, ticklabels), ylabel)
+                   _with_default(yticklabels, ticklabels), params.ylabel)
 
         # manually set x limits
         xmin = np.min(params.x_coords)
@@ -216,8 +212,8 @@ def jitter(labels, values, sigma=0.1,
                                   scale=sc.norm)
 
         # save and show
-        if filename is not None:
-            fig.savefig(filename, dpi=dpi)
         if show_fig:
             show(fig)
+        if filename is not None:
+            fig.savefig(filename, dpi=dpi)
     return ax
diff --git a/scprep/plot/scatter.py b/scprep/plot/scatter.py
index 6821c049..9bfa5738 100644
--- a/scprep/plot/scatter.py
+++ b/scprep/plot/scatter.py
@@ -9,21 +9,34 @@
                     _with_default)
 from .tools import (create_colormap, create_normalize,
                     label_axis, generate_colorbar, generate_legend)
+from . import colors
 
 from .._lazyload import matplotlib as mpl
 plt = mpl.pyplot
 
 
+def _squeeze_array(x):
+    x = utils.toarray([x]).squeeze()
+    try:
+        len(x)
+    except TypeError:
+        x = x[None]
+    return x
+
+
 class _ScatterParams(object):
 
-    def __init__(self, x, y, z=None, c=None, discrete=None,
+    def __init__(self, x, y, z=None, c=None, mask=None,
+                 discrete=None,
                  cmap=None, cmap_scale=None, vmin=None,
                  vmax=None, s=None, legend=None, colorbar=None,
-                 shuffle=True):
-        self._x = utils.toarray(x).squeeze()
-        self._y = utils.toarray(y).squeeze()
-        self._z = utils.toarray(z).squeeze() if z is not None else None
+                 xlabel=None, ylabel=None, zlabel=None,
+                 label_prefix=None, shuffle=True):
+        self._x = x
+        self._y = y
+        self._z = z if z is not None else None
         self._c = c
+        self._mask = mask
         self._discrete = discrete
         self._cmap = cmap
         self._cmap_scale = cmap_scale
@@ -34,9 +47,14 @@ def __init__(self, x, y, z=None, c=None, discrete=None,
         self._colorbar = colorbar
         self._labels = None
         self._c_discrete = None
+        self._label_prefix = label_prefix
+        self._xlabel = xlabel
+        self._ylabel = ylabel
+        self._zlabel = zlabel
         self.shuffle = shuffle
         self.check_size()
         self.check_c()
+        self.check_mask()
         self.check_s()
         self.check_discrete()
         self.check_legend()
@@ -44,32 +62,49 @@ def __init__(self, x, y, z=None, c=None, discrete=None,
         self.check_cmap_scale()
         self.check_vmin_vmax()
 
+    @property
+    def x_array(self):
+        return _squeeze_array(self._x)
+
+    @property
+    def y_array(self):
+        return _squeeze_array(self._y)
+
+    @property
+    def z_array(self):
+        return _squeeze_array(self._z) if self._z is not None else None
+
     @property
     def size(self):
-        return len(self._x)
+        try:
+            return self._size
+        except AttributeError:
+            self._size = len(self.x_array)
+            return self._size
 
     @property
     def plot_idx(self):
         try:
             return self._plot_idx
         except AttributeError:
+            self._plot_idx = np.arange(self.size)
+            if self._mask is not None:
+                self._plot_idx = self._plot_idx[self._mask]
             if self.shuffle:
-                self._plot_idx = np.random.permutation(self.size)
-            else:
-                self._plot_idx = np.arange(self.size)
+                self._plot_idx = np.random.permutation(self._plot_idx)
             return self._plot_idx
 
     @property
     def x(self):
-        return self._x[self.plot_idx]
+        return self.x_array[self.plot_idx]
 
     @property
     def y(self):
-        return self._y[self.plot_idx]
+        return self.y_array[self.plot_idx]
 
     @property
     def z(self):
-        return self._z[self.plot_idx] if self._z is not None else None
+        return self.z_array[self.plot_idx] if self._z is not None else None
 
     @property
     def data(self):
@@ -81,9 +116,9 @@ def data(self):
     @property
     def _data(self):
         if self._z is not None:
-            return [self._x, self._y, self._z]
+            return [self.x_array, self.y_array, self.z_array]
         else:
-            return [self._x, self._y]
+            return [self.x_array, self.y_array]
 
     @property
     def s(self):
@@ -110,13 +145,20 @@ def array_c(self):
                 self._c)
             return self._array_c
 
+    @property
+    def _c_masked(self):
+        if self.constant_c() or self._mask is None:
+            return self._c
+        else:
+            return self._c[self._mask]
+
     @property
     def c_unique(self):
         """Get unique values in c to avoid recomputing every time"""
         try:
             return self._c_unique
         except AttributeError:
-            self._c_unique = np.unique(self._c)
+            self._c_unique = np.unique(self._c_masked)
             return self._c_unique
 
     @property
@@ -145,7 +187,7 @@ def discrete(self):
             else:
                 if isinstance(self._cmap, dict) or not \
                     np.all([isinstance(x, numbers.Number)
-                            for x in self._c]):
+                            for x in self._c_masked]):
                     # cmap dictionary or non-numeric values force discrete
                     return True
                 else:
@@ -171,8 +213,9 @@ def c_discrete(self):
                 for i, label in enumerate(self._labels):
                     self._c_discrete[self._c == label] = i
             else:
-                self._c_discrete, self._labels = pd.factorize(
-                    self._c, sort=True)
+                self._c_discrete = np.zeros_like(self._c, dtype=int)
+                self._c_discrete[self._mask], self._labels = pd.factorize(
+                    self._c_masked, sort=True)
         return self._c_discrete
 
     @property
@@ -215,7 +258,7 @@ def vmin(self):
             if self.constant_c() or self.array_c() or self.discrete:
                 return None
             else:
-                return np.min(self.c)
+                return np.nanmin(self.c)
 
     @property
     def vmax(self):
@@ -225,7 +268,7 @@ def vmax(self):
             if self.constant_c() or self.array_c() or self.discrete:
                 return None
             else:
-                return np.max(self.c)
+                return np.nanmax(self.c)
 
     def list_cmap(self):
         """Is the colormap a list?"""
@@ -257,11 +300,7 @@ def cmap(self):
             if self.constant_c() or self.array_c():
                 return None
             elif self.discrete:
-                n_unique_colors = self.n_c_unique
-                if n_unique_colors <= 10:
-                    return self.process_string_cmap('tab10')
-                else:
-                    return self.process_string_cmap('tab20')
+                return colors.tab(n=self.n_c_unique)
             else:
                 return self.process_string_cmap('inferno')
 
@@ -354,14 +393,21 @@ def check_size(self):
 
     def check_c(self):
         if not self.constant_c():
-            self._c = utils.toarray(self._c).squeeze()
+            self._c = _squeeze_array(self._c)
             if not len(self._c) == self.size:
                 raise ValueError("Expected c of length {} or 1. Got {}".format(
                     self.size, len(self._c)))
 
+    def check_mask(self):
+        if self._mask is not None:
+            self._mask = _squeeze_array(self._mask)
+            if not len(self._mask) == self.size:
+                raise ValueError("Expected mask of length {}. Got {}".format(
+                    self.size, len(self._mask)))
+
     def check_s(self):
         if self._s is not None and not isinstance(self._s, numbers.Number):
-            self._s = utils.toarray(self._s).squeeze()
+            self._s = _squeeze_array(self._s)
             if not len(self._s) == self.size:
                 raise ValueError("Expected s of length {} or 1. Got {}".format(
                     self.size, len(self._s)))
@@ -414,10 +460,46 @@ def check_cmap_scale(self):
                     UserWarning)
                 self._cmap_scale = 'linear'
 
+    @property
+    def xlabel(self):
+        if self._xlabel is not None:
+            return self._xlabel
+        elif self._label_prefix is not None:
+            return self._label_prefix + "1"
+        elif isinstance(self._x, pd.Series):
+            return self._x.name
+        else:
+            return None
+
+    @property
+    def ylabel(self):
+        if self._ylabel is not None:
+            return self._ylabel
+        elif self._label_prefix is not None:
+            return self._label_prefix + "2"
+        elif isinstance(self._y, pd.Series):
+            return self._y.name
+        else:
+            return None
+
+    @property
+    def zlabel(self):
+        if self._z is None:
+            return None
+        elif self._zlabel is not None:
+            return self._zlabel
+        elif self._label_prefix is not None:
+            return self._label_prefix + "3"
+        elif isinstance(self._z, pd.Series):
+            return self._z.name
+        else:
+            return None
+
 
 @utils._with_pkg(pkg="matplotlib", min_version=3)
-def scatter(x, y, z=None,
-            c=None, cmap=None, cmap_scale='linear', s=None, discrete=None,
+def scatter(x, y, z=None, mask=None,
+            c=None, cmap=None, cmap_scale='linear', s=None,
+            discrete=None,
             ax=None,
             legend=None, colorbar=None,
             shuffle=True,
@@ -458,6 +540,8 @@ def scatter(x, y, z=None,
         data for y axis
     z : list-like, optional (default: None)
         data for z axis
+    mask : list-like, optional (default: None)
+        boolean mask to hide data points
     c : list-like or None, optional (default: None)
         Color vector. Can be a single color value (RGB, RGBA, or named
         matplotlib colors), an array of these of length n_samples, or a list of
@@ -560,11 +644,12 @@ def scatter(x, y, z=None,
     """
     with temp_fontsize(fontsize):
         params = _ScatterParams(
-            x, y, z, c=c, discrete=discrete,
+            x, y, z, c=c, mask=mask, discrete=discrete,
             cmap=cmap, cmap_scale=cmap_scale,
             vmin=vmin, vmax=vmax, s=s,
             legend=legend, colorbar=colorbar,
-            shuffle=shuffle)
+            xlabel=xlabel, ylabel=ylabel, zlabel=zlabel,
+            label_prefix=label_prefix, shuffle=shuffle)
 
         fig, ax, show_fig = _get_figure(
             ax, figsize, subplot_kw=params.subplot_kw)
@@ -575,23 +660,14 @@ def scatter(x, y, z=None,
             c=params.c, cmap=params.cmap, norm=params.norm, s=params.s,
             vmin=params.vmin, vmax=params.vmax, **plot_kwargs)
 
-        # automatic axis labels
-        if label_prefix is not None:
-            if xlabel is None:
-                xlabel = label_prefix + "1"
-            if ylabel is None:
-                ylabel = label_prefix + "2"
-            if zlabel is None:
-                zlabel = label_prefix + "3"
-
         # label axes
         label_axis(ax.xaxis, _with_default(xticks, ticks),
-                   _with_default(xticklabels, ticklabels), xlabel)
+                   _with_default(xticklabels, ticklabels), params.xlabel)
         label_axis(ax.yaxis, _with_default(yticks, ticks),
-                   _with_default(yticklabels, ticklabels), ylabel)
+                   _with_default(yticklabels, ticklabels), params.ylabel)
         if z is not None:
             label_axis(ax.zaxis, _with_default(zticks, ticks),
-                       _with_default(zticklabels, ticklabels), zlabel)
+                       _with_default(zticklabels, ticklabels), params.zlabel)
 
         if title is not None:
             ax.set_title(title, fontsize=parse_fontsize(None, 'xx-large'))
@@ -614,15 +690,15 @@ def scatter(x, y, z=None,
             ax.view_init(elev=elev, azim=azim)
 
         # save and show
-        if filename is not None:
-            fig.savefig(filename, dpi=dpi)
         if show_fig:
             show(fig)
+        if filename is not None:
+            fig.savefig(filename, dpi=dpi)
     return ax
 
 
 @utils._with_pkg(pkg="matplotlib", min_version=3)
-def scatter2d(data,
+def scatter2d(data, mask=None,
               c=None, cmap=None, cmap_scale='linear', s=None, discrete=None,
               ax=None, legend=None, colorbar=None,
               shuffle=True, figsize=None,
@@ -652,6 +728,8 @@ def scatter2d(data,
     ----------
     data : array-like, shape=[n_samples, n_features]
         Input data. Only the first two components will be used.
+    mask : list-like, optional (default: None)
+        boolean mask to hide data points
     c : list-like or None, optional (default: None)
         Color vector. Can be a single color value (RGB, RGBA, or named
         matplotlib colors), an array of these of length n_samples, or a list of
@@ -749,8 +827,11 @@ def scatter2d(data,
     >>> data[colors == 'a'] += 10
     >>> scprep.plot.scatter2d(data, c=colors, cmap={'a' : [1,0,0,1], 'b' : 'xkcd:sky blue'})
     """
+    if isinstance(data, list):
+        data = utils.toarray(data)
     return scatter(x=select.select_cols(data, idx=0),
                    y=select.select_cols(data, idx=1),
+                   mask=mask,
                    c=c, cmap=cmap, cmap_scale=cmap_scale, s=s, discrete=discrete,
                    ax=ax, legend=legend, colorbar=colorbar,
                    shuffle=shuffle, figsize=figsize,
@@ -774,7 +855,7 @@ def scatter2d(data,
 
 
 @utils._with_pkg(pkg="matplotlib", min_version=3)
-def scatter3d(data,
+def scatter3d(data, mask=None,
               c=None, cmap=None, cmap_scale='linear', s=None, discrete=None,
               ax=None, legend=None, colorbar=None,
               shuffle=True,
@@ -809,6 +890,8 @@ def scatter3d(data,
     ----------
     data : array-like, shape=[n_samples, n_features]
         Input data. Only the first two components will be used.
+    mask : list-like, optional (default: None)
+        boolean mask to hide data points
     c : list-like or None, optional (default: None)
         Color vector. Can be a single color value (RGB, RGBA, or named
         matplotlib colors), an array of these of length n_samples, or a list of
@@ -910,9 +993,15 @@ def scatter3d(data,
     >>> data[colors == 'a'] += 5
     >>> scprep.plot.scatter3d(data, c=colors, cmap={'a' : [1,0,0,1], 'b' : 'xkcd:sky blue'})
     """
-    return scatter(x=select.select_cols(data, idx=0),
-                   y=select.select_cols(data, idx=1),
-                   z=select.select_cols(data, idx=2),
+    if isinstance(data, list):
+        data = utils.toarray(data)
+    try:
+        x = select.select_cols(data, idx=0)
+        y = select.select_cols(data, idx=1)
+        z = select.select_cols(data, idx=2)
+    except IndexError:
+        raise ValueError("Expected data.shape[1] >= 3. Got {}".format(data.shape[1]))
+    return scatter(x=x, y=y, z=z, mask=mask,
                    c=c, cmap=cmap, cmap_scale=cmap_scale, s=s, discrete=discrete,
                    ax=ax, legend=legend, colorbar=colorbar,
                    shuffle=shuffle, figsize=figsize,
@@ -977,7 +1066,7 @@ def rotate_scatter3d(data,
         savefig.dpi in the matplotlibrc file. If 'figure' it will set the dpi
         to be the value of the figure. Only used if filename is not None.
     **kwargs : keyword arguments
-        See :~func:`phate.plot.scatter3d`.
+        See :~func:`scprep.plot.scatter3d`.
 
     Returns
     -------
diff --git a/scprep/plot/scree.py b/scprep/plot/scree.py
index fa98b2f6..fc693798 100644
--- a/scprep/plot/scree.py
+++ b/scprep/plot/scree.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 from .. import utils
+from .._lazyload import matplotlib as mpl
 
 from .utils import (_get_figure, show,
                     temp_fontsize)
@@ -10,7 +11,7 @@
 @utils._with_pkg(pkg="matplotlib", min_version=3)
 def scree_plot(singular_values, cumulative=False, ax=None, figsize=None,
                xlabel='Principal Component', ylabel='Explained Variance (%)',
-               fontsize=None,
+               fontsize=None, filename=None, dpi=None,
                **kwargs):
     """Plot the explained variance of each principal component
 
@@ -28,6 +29,12 @@ def scree_plot(singular_values, cumulative=False, ax=None, figsize=None,
         Labels to display on the x and y axis.
     fontsize : float or None (default: None)
         Base font size.
+    filename : str or None (default: None)
+        file to which the output is saved
+    dpi : int or None, optional (default: None)
+        The resolution in dots per inch. If None it will default to the value
+        savefig.dpi in the matplotlibrc file. If 'figure' it will set the dpi
+        to be the value of the figure. Only used if filename is not None.
     **kwargs : additional arguments for `matplotlib.pyplot.plot`
 
     Returns
@@ -46,14 +53,18 @@ def scree_plot(singular_values, cumulative=False, ax=None, figsize=None,
     """
     with temp_fontsize(fontsize):
         explained_variance = singular_values ** 2
-        explained_variance = explained_variance / explained_variance.sum()
+        explained_variance = explained_variance / explained_variance.sum() * 100
         if cumulative:
             explained_variance = np.cumsum(explained_variance)
         fig, ax, show_fig = _get_figure(ax, figsize)
-        ax.plot(np.arange(len(explained_variance)),
-                explained_variance, **kwargs)
+        ax.bar(np.arange(len(explained_variance)) + 1,
+               explained_variance, **kwargs)
         label_axis(ax.xaxis, label=xlabel)
         label_axis(ax.yaxis, label=ylabel)
+        ax.xaxis.set_major_locator(mpl.ticker.MaxNLocator(integer=True))
+        ax.set_xlim(0.3, len(explained_variance) + 0.7)
         if show_fig:
             show(fig)
+        if filename is not None:
+            fig.savefig(filename, dpi=dpi)
     return ax
diff --git a/scprep/plot/utils.py b/scprep/plot/utils.py
index 900aa3fb..f0d77e92 100644
--- a/scprep/plot/utils.py
+++ b/scprep/plot/utils.py
@@ -82,8 +82,8 @@ def show(fig):
     fig : matplotlib.Figure
         Figure to show
     """
+    fig.tight_layout()
     if _mpl_is_gui_backend():
-        fig.tight_layout()
         if platform.system() == "Windows":
             plt.show(block=True)
         else:
diff --git a/scprep/plot/variable_genes.py b/scprep/plot/variable_genes.py
new file mode 100644
index 00000000..c5fc6b43
--- /dev/null
+++ b/scprep/plot/variable_genes.py
@@ -0,0 +1,70 @@
+from .scatter import scatter
+from .. import utils, measure
+
+
+@utils._with_pkg(pkg="matplotlib", min_version=3)
+def plot_gene_variability(data, span=0.7, interpolate=0.2, kernel_size=0.05,
+                          cutoff=None, percentile=90,
+                          ax=None, figsize=None,
+                          xlabel='Gene mean',
+                          ylabel='Standardized variance',
+                          title=None,
+                          fontsize=None,
+                          filename=None,
+                          dpi=None, **kwargs):
+    """Plot the histogram of gene variability
+
+    Variability is computed as the deviation from a loess fit
+    to the rolling median of the mean-variance curve
+
+    Parameters
+    ----------
+    data : array-like, shape=[n_samples, n_features]
+        Input data. Multiple datasets may be given as a list of array-likes.
+    span : float, optional (default: 0.7)
+        Fraction of genes to use when computing the loess estimate at each point
+    interpolate : float, optional (default: 0.2)
+        Multiple of the standard deviation of variances at which to interpolate
+        linearly in order to reduce computation time.
+    kernel_size : float or int, optional (default: 0.05)
+        Width of rolling median window. If a float, the width is given by
+        kernel_size * data.shape[1]
+    cutoff : float or `None`, optional (default: `None`)
+        Absolute cutoff at which to draw a vertical line.
+        Only one of `cutoff` and `percentile` may be given.
+    percentile : float or `None`, optional (default: 90)
+        Percentile between 0 and 100 at which to draw a vertical line.
+        Only one of `cutoff` and `percentile` may be given.
+    ax : `matplotlib.Axes` or None, optional (default: None)
+        Axis to plot on. If None, a new axis will be created.
+    figsize : tuple or None, optional (default: None)
+        If not None, sets the figure size (width, height)
+    [x,y]label : str, optional
+        Labels to display on the x and y axis.
+    title : str or None, optional (default: None)
+        Axis title.
+    fontsize : float or None (default: None)
+        Base font size.
+    filename : str or None (default: None)
+        file to which the output is saved
+    dpi : int or None, optional (default: None)
+        The resolution in dots per inch. If None it will default to the value
+        savefig.dpi in the matplotlibrc file. If 'figure' it will set the dpi
+        to be the value of the figure. Only used if filename is not None.
+    **kwargs : additional arguments for `matplotlib.pyplot.hist`
+
+    Returns
+    -------
+    ax : `matplotlib.Axes`
+        axis on which plot was drawn
+    """
+    variability, means = measure.gene_variability(data, span=span, interpolate=interpolate,
+                                                  kernel_size=kernel_size, return_means=True)
+    keep_cells_idx = utils._get_filter_idx(variability,
+                                           cutoff, percentile,
+                                           keep_cells='above')
+    return scatter(means, variability, c=keep_cells_idx,
+                   cmap={True : 'red', False : 'black'}, 
+                   xlabel=xlabel, ylabel=ylabel, title=title,
+                   fontsize=fontsize, filename=filename, dpi=dpi,
+                   **kwargs)
diff --git a/scprep/reduce.py b/scprep/reduce.py
index 028d36c6..089eaa33 100644
--- a/scprep/reduce.py
+++ b/scprep/reduce.py
@@ -238,11 +238,12 @@ def pca(data, n_components=100, eps=0.3,
         Parameter to control the quality of the embedding of sparse input.
         Smaller values lead to more accurate embeddings but higher
         computational and memory costs
-    method : {'svd', 'orth_rproj', 'rproj'}, optional (default: 'svd')
+    method : {'svd', 'orth_rproj', 'rproj', 'dense'}, optional (default: 'svd')
         Dimensionality reduction method applied prior to mean centering
         of sparse input. The method choice affects accuracy
-        (`svd` > `orth_rproj` > `rproj`) comes with increased 
-        computational cost (but not memory.)
+        (`svd` > `orth_rproj` > `rproj`) and comes with increased 
+        computational cost (but not memory.) On the other hand,
+        `method='dense'` adds a memory cost but is faster.
     seed : int, RandomState or None, optional (default: None)
         Random state.
     return_singular_values : bool, optional (default: False)
@@ -277,10 +278,14 @@ def pca(data, n_components=100, eps=0.3,
                              n_components, min(data.shape)))
 
     # handle dataframes
-    if isinstance(data, pd.SparseDataFrame):
-        data = data.to_coo()
-    elif isinstance(data, pd.DataFrame):
-        data = data.values
+    if isinstance(data, pd.DataFrame):
+        index = data.index
+    else:
+        index = None
+    if method == 'dense':
+        data = utils.toarray(data)
+    else:
+        data = utils.to_array_or_spmatrix(data)
 
     # handle sparsity
     if sparse.issparse(data):
@@ -299,6 +304,10 @@ def pca(data, n_components=100, eps=0.3,
         pca_op = decomposition.PCA(n_components, random_state=seed)
         data = pca_op.fit_transform(data)
 
+    if index is not None:
+        data = pd.DataFrame(data, index=index,
+                            columns=["PC{}".format(i+1) for i in range(n_components)])
+
     if return_singular_values:
         data = (data, pca_op.singular_values_)
     return data
diff --git a/scprep/run/__init__.py b/scprep/run/__init__.py
index 220af9f1..0a07c96e 100644
--- a/scprep/run/__init__.py
+++ b/scprep/run/__init__.py
@@ -1,2 +1,3 @@
-from .r_function import RFunction
+from .r_function import RFunction, install_bioconductor
 from .splatter import SplatSimulate
+from .slingshot import Slingshot
diff --git a/scprep/run/r_function.py b/scprep/run/r_function.py
index ff644200..12df8154 100644
--- a/scprep/run/r_function.py
+++ b/scprep/run/r_function.py
@@ -138,3 +138,49 @@ def __call__(self, *args, rpy_verbose=None, **kwargs):
             robject = self.convert(robject)
         self.verbose = default_verbose
         return robject
+
+
+_install_bioconductor = RFunction(
+    args="package = character(), site_repository = character(), update = FALSE, version = BiocManager::version()",
+    body="""
+        if (!require('BiocManager')) install.packages("BiocManager")
+        ask <- !update
+        if (length(package) == 0) {
+          BiocManager::install(site_repository=site_repository,
+                               update=update, ask=ask, version=version)
+        } else {
+          for (pkg in package) {
+            if (update || !require(pkg, character.only = TRUE)) {
+              BiocManager::install(pkg, site_repository=site_repository,
+                                   update=update, ask=ask, version=version)
+            }
+          }
+        }
+    """)
+
+def install_bioconductor(package = None, site_repository = None, update = False, version = None, verbose = True):
+    """Install a Bioconductor package
+    
+    Parameters
+    ----------
+    site_repository : string, optional (default: None)
+        additional repository in which to look for packages to install.
+        This repository will be prepended to the default repositories
+    update : boolean, optional (default: False)
+        When False, don't attempt to update old packages.
+        When True, update old packages automatically.
+    version : string, optional (default: None)
+        Bioconductor version to install, e.g., version = "3.8".
+        The special symbol version = "devel" installs the current 'development' version.
+        If None, installs from the current version.
+    verbose : boolean, optional (default: True)
+        Install script verbosity.
+    """
+    kwargs = {'update': update, 'rpy_verbose': verbose}
+    if package is not None:
+        kwargs['package'] = package
+    if site_repository is not None:
+        kwargs['site_repository'] = site_repository
+    if version is not None:
+        kwargs['version'] = version
+    _install_bioconductor(**kwargs)
\ No newline at end of file
diff --git a/scprep/run/slingshot.py b/scprep/run/slingshot.py
new file mode 100644
index 00000000..f24a16e1
--- /dev/null
+++ b/scprep/run/slingshot.py
@@ -0,0 +1,236 @@
+import numpy as np
+import pandas as pd
+import warnings
+
+from . import r_function
+from .. import utils
+
+
+def install(site_repository = None, update = False, version = None, verbose = True):
+    """Install the required R packages to run Slingshot
+    
+    Parameters
+    ----------
+    site_repository : string, optional (default: None)
+        additional repository in which to look for packages to install.
+        This repository will be prepended to the default repositories
+    update : boolean, optional (default: False)
+        When False, don't attempt to update old packages.
+        When True, update old packages automatically.
+    version : string, optional (default: None)
+        Bioconductor version to install, e.g., version = "3.8".
+        The special symbol version = "devel" installs the current 'development' version.
+        If None, installs from the current version.
+    verbose : boolean, optional (default: True)
+        Install script verbosity.
+    """
+    r_function.install_bioconductor(
+        'slingshot', site_repository=site_repository,
+        update=update, version=version, verbose=verbose)
+
+
+_Slingshot = r_function.RFunction(
+    setup="""
+        library(slingshot)
+    """,
+    args="""
+        data, cluster_labels,
+        start_cluster = NULL, end_cluster = NULL,
+        distance = NULL, omega = NULL, lineages = list(), shrink = TRUE,
+        extend = "y", reweight = TRUE, reassign = TRUE, thresh = 0.001,
+        max_iter = 15, stretch = 2,
+        smoother = "smooth.spline", shrink_method = "cosine",
+        allow_breaks = TRUE, seed = NULL
+    """,
+    body="""
+        set.seed(seed)
+        data <- as.matrix(data)
+        cluster_labels <- as.factor(cluster_labels)
+
+        # Run Slingshot
+        sling <- slingshot(data, clusterLabels = cluster_labels,
+                         start.clus = start_cluster, end.clus = end_cluster,
+                         dist.fun = distance, omega = omega, lineages = lineages, shrink = shrink,
+                         extend = extend, reweight = reweight, reassign = reassign, thresh = thresh,
+                         maxit = max_iter, stretch = stretch,
+                         smoother = smoother, shrink.method = shrink_method,
+                         allow.breaks = allow_breaks)
+        list(pseudotime = slingPseudotime(sling),
+             curves = lapply(sling@curves, function(curve) curve$s[curve$ord,]))
+    """)
+
+
+def Slingshot(
+        data, cluster_labels,
+        start_cluster = None, end_cluster = None,
+        distance = None, omega = None, shrink = True,
+        extend = "y", reweight = True, reassign = True, thresh = 0.001,
+        max_iter = 15, stretch = 2,
+        smoother = "smooth.spline", shrink_method = "cosine",
+        allow_breaks = True,
+        seed=None, verbose=1):
+    """Perform lineage inference with Slingshot
+
+    Given a reduced-dimensional data matrix n by p and a vector of cluster labels
+    (or matrix of soft cluster assignments, potentially including a -1 label for "unclustered"),
+    this function performs lineage inference using a cluster-based minimum spanning tree and
+    constructing simulatenous principal curves for branching paths through the tree.
+
+    For more details, read about Slingshot on [GitHub](https://github.com/kstreet13/slingshot)
+    and [Bioconductor](https://bioconductor.org/packages/release/bioc/html/slingshot.html).
+
+    Parameters
+    ----------
+    data : array-like, shape=[n_samples, n_dimensions]
+        matrix of (reduced dimension) coordinates
+        to be used for lineage inference.
+    cluster_labels : list-like, shape=[n_samples]
+        a vector of cluster labels, optionally including -1's for "unclustered."
+    start_cluster : string, optional (default: None)
+        indicates the cluster(s) of origin.
+        Lineages will be represented by paths coming out of this cluster.
+    end_cluster : string, optional (default: None)
+        indicates the cluster(s) which will be forced leaf nodes.
+        This introduces a constraint on the MST algorithm.
+    distance : callable, optional (default: None)
+        method for calculating distances between clusters.
+        Must take two matrices as input, corresponding to subsets of reduced_dim.
+        If the minimum cluster size is larger than the number dimensions,
+        the default is to use the joint covariance matrix to find squared distance
+        between cluster centers. If not, the default is to use the diagonal of the
+        joint covariance matrix. Not currently implemented
+    omega : float, optional (default: None)
+        this granularity parameter determines the distance between every
+        real cluster and the artificial cluster.
+        It is parameterized such that this distance is omega / 2,
+        making omega the maximum distance between two connected clusters.
+        By default, omega = Inf.
+    shrink : boolean or float, optional (default: True)
+        boolean or numeric between 0 and 1, determines whether and how much to shrink
+        branching lineages toward their average prior to the split.
+    extend : {'y', 'n', 'pc1'}, optional (default: "y")
+        how to handle root and leaf clusters of lineages when 
+        constructing the initial, piece-wise linear curve.
+    reweight : boolean, optional (default: True)
+        whether to allow cells shared between lineages to be reweighted during curve-fitting.
+        If True, cells shared between lineages will be iteratively
+        reweighted based on the quantiles of their projection distances to each curve.
+    reassign : boolean, optional (default: True)
+        whether to reassign cells to lineages at each iteration.
+        If True, cells will be added to a lineage when their
+        projection distance to the curve is less than the median
+        distance for all cells currently assigned to the lineage.
+        Additionally, shared cells will be removed from a lineage if
+        their projection distance to the curve is above the 90th
+        percentile and their weight along the curve is less than 0.1.
+    thresh : float, optional (default: 0.001)
+        determines the convergence criterion. Percent change in the
+        total distance from cells to their projections along curves
+        must be less than thresh.
+    max_iter : int, optional (default: 15)
+        maximum number of iterations
+    stretch : int, optional (default: 2)
+        factor between 0 and 2 by which curves can be extrapolated beyond endpoints
+    smoother : {"smooth.spline", "lowess", "periodic_lowess"}, optional (default: "smooth.spline")
+        choice of smoother. "periodic_lowess" allows one to fit closed
+        curves. Beware, you may want to use iter = 0 with "lowess".
+    shrink_method : string, optional (default: "cosine")
+        how to determine the appropriate amount of shrinkage for a
+        branching lineage. Accepted values: "gaussian", "rectangular",
+        "triangular", "epanechnikov", "biweight", "triweight",
+        "cosine", "optcosine", "density".
+    allow_breaks : boolean, optional (default: True)
+        determines whether curves that branch very close to the origin
+        should be allowed to have different starting points.
+    seed : int or None, optional (default: None)
+        Seed to use for generating random numbers.
+    verbose : int, optional (default: 1)
+        Logging verbosity between 0 and 2.
+
+    Returns
+    -------
+    slingshot : dict
+        Contains the following keys:
+    pseudotime : array-like, shape=[n_samples, n_curves]
+        Pseudotime projection of each cell onto each principal curve.
+        Value is `np.nan` if the cell does not lie on the curve
+    branch : list-like, shape=[n_samples]
+        Branch assignment for each cell
+    curves : array_like, shape=[n_curves, n_samples, n_dimensions]
+        Coordinates of each principle curve in the reduced dimension
+
+    Examples
+    --------
+    >>> import scprep
+    >>> import phate
+    >>> data, clusters = phate.tree.gen_dla(n_branch=4, n_dim=200, branch_length=200)
+    >>> phate_op = phate.PHATE()
+    >>> data_phate = phate_op.fit_transform(data)
+    >>> slingshot = scprep.run.Slingshot(data_phate, clusters)
+    >>> ax = scprep.plot.scatter2d(data_phate, c=slingshot['pseudotime'][:,0], cmap='magma', legend_title='Branch 1')
+    >>> scprep.plot.scatter2d(data_phate, c=slingshot['pseudotime'][:,1], cmap='viridis', ax=ax,
+    ...                       ticks=False, label_prefix='PHATE', legend_title='Branch 2')
+    >>> for curve in slingshot['curves']:
+    ...     ax.plot(curve[:,0], curve[:,1], c='black')
+    >>> ax = scprep.plot.scatter2d(data_phate, c=slingshot['branch'], legend_title='Branch',
+    ...                            ticks=False, label_prefix='PHATE')
+    >>> for curve in slingshot['curves']:
+    ...     ax.plot(curve[:,0], curve[:,1], c='black')
+    """
+    if seed is None:
+        seed = np.random.randint(2**16 - 1)
+    if distance is not None:
+        raise NotImplementedError("distance argument not currently implemented")
+    np.random.seed(seed)
+
+    index = data.index if isinstance(data, pd.DataFrame) else None
+    
+    data = utils.toarray(data)
+    if data.shape[1] > 3:
+        warnings.warn("Expected data to be low-dimensional. "
+                      "Got data.shape[1] = {}".format(data.shape[1]),
+                      UserWarning)
+    cluster_labels = utils.toarray(cluster_labels).flatten()
+    if not cluster_labels.shape[0] == data.shape[0]:
+        raise ValueError("Expected len(cluster_labels) ({}) to equal "
+                         "data.shape[0] ({})".format(cluster_labels.shape[0], data.shape[0]))
+
+    kwargs = {}
+    if start_cluster is not None:
+        kwargs['start_cluster'] = start_cluster
+    if end_cluster is not None:
+        kwargs['end_cluster'] = end_cluster
+    if omega is not None:
+        kwargs['omega'] = omega
+
+    slingshot = _Slingshot(
+        data=data, cluster_labels=cluster_labels,
+        shrink = shrink,
+        extend = extend, reweight = reweight, reassign = reassign, thresh = thresh,
+        max_iter = max_iter, stretch = stretch,
+        smoother = smoother, shrink_method = shrink_method,
+        allow_breaks = allow_breaks, **kwargs,
+        seed=seed, rpy_verbose=verbose)
+    slingshot['curves'] = np.array(list(slingshot['curves'].values()))
+
+    membership = (~np.isnan(slingshot['pseudotime'])).astype(int)
+    branch = np.sum(membership * (2**np.arange(membership.shape[1])), axis=1)
+    # reorder based on pseudotime
+    branch_ids = np.unique(branch)
+    branch_means = [np.nanmean(slingshot['pseudotime'][branch==id])
+                    if not np.all(np.isnan(slingshot['pseudotime'][branch==id])) else np.nan
+                    for id in branch_ids]
+    branch_order = np.argsort(branch_means)
+    branch_old = branch.copy()
+    for i in range(len(branch_order)):
+        j = branch_order[i]
+        if np.isnan(branch_means[j]):
+            branch[branch_old == branch_ids[j]] = -1
+        else:
+            branch[branch_old == branch_ids[j]] = i
+    slingshot['branch'] = branch
+
+    if index is not None:
+        slingshot['pseudotime'] = pd.DataFrame(slingshot['pseudotime'], index=index)
+        slingshot['branch'] = pd.Series(slingshot['branch'], name='branch', index=index)
+    return slingshot
diff --git a/scprep/run/splatter.py b/scprep/run/splatter.py
index 2bcaa187..4bbc18cb 100644
--- a/scprep/run/splatter.py
+++ b/scprep/run/splatter.py
@@ -1,8 +1,32 @@
 import numpy as np
 
-from .r_function import RFunction
+from . import r_function
 
-_SplatSimulate = RFunction(
+
+def install(site_repository = None, update = False, version = None, verbose = True):
+    """Install the required R packages to run Splatter
+    
+    Parameters
+    ----------
+    site_repository : string, optional (default: None)
+        additional repository in which to look for packages to install.
+        This repository will be prepended to the default repositories
+    update : boolean, optional (default: False)
+        When False, don't attempt to update old packages.
+        When True, update old packages automatically.
+    version : string, optional (default: None)
+        Bioconductor version to install, e.g., version = "3.8".
+        The special symbol version = "devel" installs the current 'development' version.
+        If None, installs from the current version.
+    verbose : boolean, optional (default: True)
+        Install script verbosity.
+    """
+    r_function.install_bioconductor(
+        'splatter', site_repository=site_repository,
+        update=update, version=version, verbose=verbose)
+
+
+_SplatSimulate = r_function.RFunction(
     setup="""
         library(splatter)
     """,
diff --git a/scprep/select.py b/scprep/select.py
index 6d055be5..6d22c699 100644
--- a/scprep/select.py
+++ b/scprep/select.py
@@ -5,6 +5,7 @@
 import warnings
 import re
 import sys
+
 from . import utils
 
 if int(sys.version.split(".")[1]) < 7:
@@ -57,7 +58,7 @@ def _check_columns_compatible(*data):
                 raise ValueError(
                     "Expected `data` and `extra_data` pandas inputs to have "
                     "the same column names. Fix with "
-                    "`scprep.select.select_cols(*extra_data, data.columns)`")
+                    "`scprep.select.select_cols(*extra_data, idx=data.columns)`")
 
 
 def _check_rows_compatible(*data):
@@ -73,7 +74,7 @@ def _check_rows_compatible(*data):
                 raise ValueError(
                     "Expected `data` and `extra_data` pandas inputs to have "
                     "the same index. Fix with "
-                    "`scprep.select.select_rows(*extra_data, data.index)`")
+                    "`scprep.select.select_rows(*extra_data, idx=data.index)`")
 
 
 def _convert_dataframe_1d(idx):
@@ -112,7 +113,7 @@ def _exact_word_regex(word):
     allowed_chars = ['\\(', '\\)', '\\[', '\\]', '\\.',
                      ',', '!', '\\?', ' ', '^', '$']
     wildcard = "(" + "|".join(allowed_chars) + ")+"
-    return "{wildcard}{word}{wildcard}".format(wildcard=wildcard, word=word)
+    return "{wildcard}{word}{wildcard}".format(wildcard=wildcard, word=re.escape(word))
 
 
 def _get_string_subset_mask(data, starts_with=None, ends_with=None,
@@ -218,7 +219,7 @@ def get_gene_set(data, starts_with=None, ends_with=None,
     """
     if not _is_1d(data):
         try:
-            data = data.columns.values
+            data = data.columns.to_numpy()
         except AttributeError:
             raise TypeError("data must be a list of gene names or a pandas "
                             "DataFrame. Got {}".format(type(data).__name__))
@@ -255,7 +256,7 @@ def get_cell_set(data, starts_with=None, ends_with=None,
     """
     if not _is_1d(data):
         try:
-            data = data.index.values
+            data = data.index.to_numpy()
         except AttributeError:
             raise TypeError("data must be a list of cell names or a pandas "
                             "DataFrame. Got {}".format(type(data).__name__))
@@ -329,21 +330,37 @@ def select_cols(data, *extra_data, idx=None,
         _check_idx_1d(idx)
         idx = idx.flatten()
 
+    if isinstance(data, pd.SparseDataFrame):
+        # evil deprecated dataframe; get rid of it
+        data = utils.SparseDataFrame(data)
     if isinstance(data, pd.DataFrame):
         try:
-            data = data.loc[:, idx]
+            if isinstance(idx, (numbers.Integral, str)):
+                data = data.loc[:, idx]
+            else:
+                if np.issubdtype(idx.dtype, np.dtype(bool).type):
+                    # temporary workaround for pandas error
+                    raise TypeError
+                data = data.loc[:, idx]
         except (KeyError, TypeError):
+            if isinstance(idx, str):
+                raise
             if isinstance(idx, numbers.Integral) or \
-                    issubclass(np.array(idx).dtype.type, numbers.Integral):
+                    np.issubdtype(idx.dtype, np.dtype(int)) or \
+                    np.issubdtype(idx.dtype, np.dtype(bool)):
                 data = data.loc[:, np.array(data.columns)[idx]]
             else:
                 raise
     elif isinstance(data, pd.Series):
         try:
+            if np.issubdtype(idx.dtype, np.dtype(bool).type):
+                # temporary workaround for pandas error
+                raise TypeError
             data = data.loc[idx]
         except (KeyError, TypeError):
             if isinstance(idx, numbers.Integral) or \
-                    issubclass(np.array(idx).dtype.type, numbers.Integral):
+                    np.issubdtype(idx.dtype, np.dtype(int)) or \
+                    np.issubdtype(idx.dtype, np.dtype(bool)):
                 data = data.loc[np.array(data.index)[idx]]
             else:
                 raise
@@ -432,16 +449,28 @@ def select_rows(data, *extra_data, idx=None,
         _check_idx_1d(idx)
         idx = idx.flatten()
 
+    if isinstance(data, pd.SparseDataFrame):
+        # evil deprecated dataframe; get rid of it
+        data = utils.SparseDataFrame(data)
     if isinstance(data, (pd.DataFrame, pd.Series)):
         try:
-            with warnings.catch_warnings():
-                warnings.filterwarnings(
-                    "error", "Passing list-likes to .loc")
+            if isinstance(idx, (numbers.Integral, str)):
                 data = data.loc[idx]
+            else:
+                if np.issubdtype(idx.dtype, np.dtype(bool).type):
+                    # temporary workaround for pandas error
+                    raise TypeError
+                with warnings.catch_warnings():
+                    warnings.filterwarnings(
+                        "error", "Passing list-likes to .loc")
+                    data = data.loc[idx]
         except (KeyError, TypeError, FutureWarning):
+            if isinstance(idx, str):
+                raise
             if isinstance(idx, numbers.Integral) or \
-                    issubclass(np.array(idx).dtype.type, numbers.Integral):
-                data = data.iloc[idx]
+                    np.issubdtype(idx.dtype, np.dtype(int)) or \
+                    np.issubdtype(idx.dtype, np.dtype(bool)):
+                data = data.loc[np.array(data.index)[idx]]
             else:
                 raise
     elif _is_1d(data):
@@ -494,6 +523,50 @@ def subsample(*data, n=10000, seed=None):
     if N < n:
         raise ValueError("Expected n ({}) <= n_samples ({})".format(n, N))
     np.random.seed(seed)
-    select_idx = np.random.choice(N, n, replace=False)
+    select_idx = np.isin(np.arange(N), np.random.choice(N, n, replace=False))
     data = [select_rows(d, idx=select_idx) for d in data]
     return tuple(data) if len(data) > 1 else data[0]
+
+
+def highly_variable_genes(data, *extra_data, span=0.7, interpolate=0.2, kernel_size=0.05,
+                          cutoff=None, percentile=80):
+    """Select genes with high variability
+
+    Variability is computed as the deviation from a loess fit
+    to the rolling median of the mean-variance curve
+
+    Parameters
+    ----------
+    data : array-like, shape=[n_samples, n_features]
+        Input data
+    extra_data : array-like, shape=[any, n_features], optional
+        Optional additional data objects from which to select the same rows
+    span : float, optional (default: 0.7)
+        Fraction of genes to use when computing the loess estimate at each point
+    interpolate : float, optional (default: 0.2)
+        Multiple of the standard deviation of variances at which to interpolate
+        linearly in order to reduce computation time.
+    kernel_size : float or int, optional (default: 0.05)
+        Width of rolling median window. If a float, the width is given by
+        kernel_size * data.shape[1]
+    cutoff : float, optional (default: None)
+        Variability above which expression is deemed significant
+    percentile : int, optional (Default: 80)
+        Percentile above or below which to remove genes.
+        Must be an integer between 0 and 100. Only one of `cutoff`
+        and `percentile` should be specified.
+
+    Returns
+    -------
+    data : array-like, shape=[n_samples, m_features]
+        Filtered output data, where m_features <= n_features
+    extra_data : array-like, shape=[any, m_features]
+        Filtered extra data, if passed.
+    """
+    from . import measure
+    var_genes = measure.gene_variability(data, span=span, interpolate=interpolate,
+                                         kernel_size=kernel_size)
+    keep_cells_idx = utils._get_filter_idx(var_genes,
+                                           cutoff, percentile,
+                                           keep_cells='above')
+    return select_cols(data, *extra_data, idx=keep_cells_idx)
diff --git a/scprep/stats.py b/scprep/stats.py
index 263fb01f..574c2e8a 100644
--- a/scprep/stats.py
+++ b/scprep/stats.py
@@ -3,9 +3,11 @@
 
 import numbers
 import numpy as np
+import pandas as pd
 from scipy import stats, sparse
 from sklearn import neighbors, metrics
-from . import plot, utils
+import joblib
+from . import plot, utils, select
 import warnings
 
 from ._lazyload import matplotlib
@@ -363,6 +365,154 @@ def plot_knnDREMI(dremi, mutual_info, x, y, n_bins, n_mesh,
     plot.utils.show(fig)
 
 
+def mean_difference(X, Y):
+    """Calculate the mean difference in genes between two datasets
+
+    In the case where the data has been log normalized,
+    this is equivalent to fold change.
+
+    Parameters
+    ----------
+    X : array-like, shape=[n_cells, n_genes]
+    Y : array-like, shape=[m_cells, n_genes]
+
+    Returns
+    -------
+    difference : list-like, shape=[n_genes]
+    """
+    if not X.shape[1] == Y.shape[1]:
+        raise ValueError("Expected X and Y to have the same number of columns. "
+                         "Got shapes {}, {}".format(X.shape, Y.shape))
+    X = utils.to_array_or_spmatrix(X)
+    Y = utils.to_array_or_spmatrix(Y)
+    X = utils.toarray(X.mean(axis=0)).flatten()
+    Y = utils.toarray(Y.mean(axis=0)).flatten()
+    return X - Y
+
+
+def differential_expression(X, Y,
+                            measure='difference',
+                            direction='up',
+                            gene_names=None,
+                            n_jobs=-2):
+    """Calculate the most significant genes between two datasets
+
+    Parameters
+    ----------
+    X : array-like, shape=[n_cells, n_genes]
+    Y : array-like, shape=[m_cells, n_genes]
+    measure : {'difference', 'emd'}, optional (default: 'difference')
+        The measurement to be used to rank genes
+    direction : {'up', 'down', 'both'}, optional (default: 'up')
+        The direction in which to consider genes significant. If 'up', rank genes where X > Y. If 'down', rank genes where X < Y. If 'both', rank genes by absolute value.
+    gene_names : list-like or `None`, optional (default: `None`)
+        List of gene names associated with the columns of X and Y
+    n_jobs : int, optional (default: -2)
+        Number of threads to use if the measurement is parallelizable (currently used for EMD). If negative, -1 refers to all available cores.
+
+    Returns
+    -------
+    result : pd.DataFrame
+        Ordered DataFrame with a column "gene" and a column named `measure`.
+    """
+    if not direction in ['up', 'down', 'both']:
+        raise ValueError("Expected `direction` in ['up', 'down', 'both']. "
+                         "Got {}".format(direction))
+    if not measure in ['difference', 'emd']:
+        raise ValueError("Expected `measure` in ['difference', 'emd']. "
+                         "Got {}".format(measure))
+    if not (len(X.shape) == 2 and len(Y.shape) == 2):
+        raise ValueError("Expected `X` and `Y` to be matrices. "
+                         "Got shapes {}, {}".format(X.shape, Y.shape))
+    [X, Y] = utils.check_consistent_columns([X, Y])
+    if gene_names is not None:
+        if isinstance(X, pd.DataFrame):
+            X = select.select_cols(X, idx=gene_names)
+            gene_names = X.columns
+        if isinstance(Y, pd.DataFrame):
+            Y = select.select_cols(Y, idx=gene_names)
+            gene_names = Y.columns
+        if not len(gene_names) == X.shape[1]:
+            raise ValueError("Expected gene_names to have length {}. "
+                             "Got {}".format(X.shape[1], len(gene_names)))
+    else:
+        if isinstance(X, pd.DataFrame) and isinstance(Y, pd.DataFrame):
+            gene_names = X.columns
+        else:
+            gene_names = np.arange(X.shape[1])
+    X = utils.to_array_or_spmatrix(X)
+    Y = utils.to_array_or_spmatrix(Y)
+    # inconsistent behaviour from csr and csc
+    if sparse.issparse(X):
+        X = X.tocsr()
+    if sparse.issparse(Y):
+        Y = Y.tocsr()
+    if measure == 'difference':
+        difference = mean_difference(X, Y)
+    elif measure == 'emd':
+        difference = joblib.Parallel(n_jobs)(joblib.delayed(EMD)(
+            select.select_cols(X, idx=i),
+            select.select_cols(Y, idx=i))
+                                             for i in range(X.shape[1]))
+        difference = np.array(difference) * np.sign(mean_difference(X, Y))
+    result = pd.DataFrame({'gene' : gene_names, measure : difference})
+    if direction == 'up':
+        result = result.sort_values([measure, 'gene'], ascending=False)
+    elif direction == 'down':
+        result = result.sort_values([measure, 'gene'], ascending=True)
+    elif direction == 'both':
+        result['measure_abs'] = np.abs(difference)
+        result = result.sort_values(['measure_abs', 'gene'], ascending=False)
+        del result['measure_abs']
+    result.index = np.arange(result.shape[0])
+    return result
+
+
+def differential_expression_by_cluster(data, clusters,
+                                       measure='difference',
+                                       direction='up',
+                                       gene_names=None,
+                                       n_jobs=-2):
+    """Calculate the most significant genes for each cluster in a dataset
+
+    Measurements are run for each cluster against the rest of the dataset.
+
+    Parameters
+    ----------
+    data : array-like, shape=[n_cells, n_genes]
+    clusters : list-like, shape=[n_cells]
+    measure : {'difference', 'emd'}, optional (default: 'difference')
+        The measurement to be used to rank genes
+    direction : {'up', 'down', 'both'}, optional (default: 'up')
+        The direction in which to consider genes significant. If 'up', rank genes where X > Y. If 'down', rank genes where X < Y. If 'both', rank genes by absolute value.
+    gene_names : list-like or `None`, optional (default: `None`)
+        List of gene names associated with the columns of X and Y
+    n_jobs : int, optional (default: -2)
+        Number of threads to use if the measurement is parallelizable (currently used for EMD). If negative, -1 refers to all available cores.
+
+    Returns
+    -------
+    result : dict(pd.DataFrame)
+        Dictionary containing an ordered DataFrame with a column "gene" and a column named `measure` for each cluster.
+    """
+    if gene_names is not None and isinstance(data, pd.DataFrame):
+        data = select.select_cols(data, idx=gene_names)
+        gene_names = data.columns
+    if gene_names is None:
+        if isinstance(data, pd.DataFrame):
+            gene_names = data.columns
+    elif not len(gene_names) == data.shape[1]:
+        raise ValueError("Expected gene_names to have length {}. "
+                         "Got {}".format(data.shape[1], len(gene_names)))
+    data = utils.to_array_or_spmatrix(data)
+    result = {cluster : differential_expression(
+        select.select_rows(data, idx=clusters==cluster),
+        select.select_rows(data, idx=clusters!=cluster),
+        measure = measure, direction = direction,
+        gene_names = gene_names, n_jobs = n_jobs)
+              for cluster in np.unique(clusters)}
+    return result
+
 def _vector_coerce_dense(x):
     x = utils.toarray(x)
     x_1d = x.flatten()
@@ -381,5 +531,5 @@ def _vector_coerce_two_dense(x, y):
             raise ValueError("Expected x and y to be 1D arrays. "
                              "Got shapes x {}, y {}".format(x.shape, y.shape))
         else:
-            raise
+            raise e
     return x, y
diff --git a/scprep/transform.py b/scprep/transform.py
index 0a55b39c..179fd026 100644
--- a/scprep/transform.py
+++ b/scprep/transform.py
@@ -60,7 +60,9 @@ def log(data, pseudocount=1, base=10):
                          "Got pseudocount = {}".format(utils.matrix_min(data),
                                                        pseudocount))
     elif pseudocount != data_min + 1 and \
-            (sparse.issparse(data) or isinstance(data, pd.SparseDataFrame)):
+            (sparse.issparse(data) or
+             isinstance(data, pd.SparseDataFrame) or
+             utils.is_sparse_dataframe(data)):
         req = "min(data) + 1 ({})".format(data_min +
                                           1) if data_min != 0 else "1"
         warnings.warn("log transform on sparse data requires "
diff --git a/scprep/utils.py b/scprep/utils.py
index 6c57022d..de2172e4 100644
--- a/scprep/utils.py
+++ b/scprep/utils.py
@@ -63,7 +63,7 @@ def check_version(pkg, min_version=None):
             "Please install it with e.g. `pip install --user {0}`".format(pkg))
     if not _version_check(module.__version__, min_version):
         raise ImportError(
-            "scprep requires {0}>={1} (installed: {2}). "
+            "{0}>={1} is required (installed: {2}). "
             "Please upgrade it with e.g."
             " `pip install --user --upgrade {0}`".format(
                 pkg, min_version, module.__version__))
@@ -78,6 +78,63 @@ def _with_pkg(fun, pkg=None, min_version=None, *args, **kwargs):
     return fun(*args, **kwargs)
 
 
+def _get_percentile_cutoff(data, cutoff=None, percentile=None, required=False):
+    """Get a cutoff for a dataset
+
+    Parameters
+    ----------
+    data : array-like
+    cutoff : float or None, optional (default: None)
+        Absolute cutoff value. Only one of cutoff and percentile may be given
+    percentile : float or None, optional (default: None)
+        Percentile cutoff value between 0 and 100.
+        Only one of cutoff and percentile may be given
+    required : bool, optional (default: False)
+        If True, one of cutoff and percentile must be given.
+
+    Returns
+    -------
+    cutoff : float or None
+        Absolute cutoff value. Can only be None if required is False and
+        cutoff and percentile are both None.
+    """
+    if percentile is not None:
+        if cutoff is not None:
+            raise ValueError(
+                "Only one of `cutoff` and `percentile` should be given."
+                "Got cutoff={}, percentile={}".format(cutoff, percentile))
+        if not isinstance(percentile, numbers.Number):
+            return [_get_percentile_cutoff(data, percentile=p)
+                    for p in percentile]
+        if percentile < 1:
+            warnings.warn(
+                "`percentile` expects values between 0 and 100."
+                "Got {}. Did you mean {}?".format(percentile,
+                                                  percentile * 100),
+                UserWarning)
+        cutoff = np.percentile(np.array(data).reshape(-1), percentile)
+    elif cutoff is None and required:
+        raise ValueError(
+            "One of either `cutoff` or `percentile` must be given.")
+    return cutoff
+
+
+
+def _get_filter_idx(values,
+                    cutoff, percentile,
+                    keep_cells):
+    cutoff = _get_percentile_cutoff(
+        values, cutoff, percentile, required=True)
+    if keep_cells == 'above':
+        keep_cells_idx = values > cutoff
+    elif keep_cells == 'below':
+        keep_cells_idx = values < cutoff
+    else:
+        raise ValueError("Expected `keep_cells` in ['above', 'below']. "
+                         "Got {}".format(keep_cells))
+    return keep_cells_idx
+
+
 def toarray(x):
     """Convert an array-like to a np.ndarray
 
@@ -93,13 +150,13 @@ def toarray(x):
     if isinstance(x, pd.SparseDataFrame):
         x = x.to_coo().toarray()
     elif isinstance(x, pd.SparseSeries):
-        x = x.to_dense().values
+        x = x.to_dense().to_numpy()
     elif isinstance(x, (pd.DataFrame, pd.Series, pd.Index)):
-        x = x.values
+        x = x.to_numpy()
     elif isinstance(x, sparse.spmatrix):
         x = x.toarray()
     elif isinstance(x, np.matrix):
-        x = np.array(x)
+        x = x.A
     elif isinstance(x, list):
         x_out = []
         for xi in x:
@@ -137,7 +194,10 @@ def to_array_or_spmatrix(x):
     """
     if isinstance(x, pd.SparseDataFrame):
         x = x.to_coo()
-    elif isinstance(x, sparse.spmatrix):
+    elif is_sparse_dataframe(x) or is_sparse_series(x):
+        x = x.sparse.to_coo()
+    elif isinstance(x, (sparse.spmatrix, np.ndarray, numbers.Number)) and \
+            not isinstance(x, np.matrix):
         pass
     elif isinstance(x, list):
         x_out = []
@@ -154,6 +214,44 @@ def to_array_or_spmatrix(x):
     return x
 
 
+def is_sparse_dataframe(x):
+    if isinstance(x, pd.DataFrame) and not isinstance(x, pd.SparseDataFrame):
+        try:
+            x.sparse
+            return True
+        except AttributeError:
+            pass
+    return False
+
+
+def is_sparse_series(x):
+    if isinstance(x, pd.Series) and not isinstance(x, pd.SparseSeries):
+        try:
+            x.sparse
+            return True
+        except AttributeError:
+            pass
+    return False
+
+
+def dataframe_to_sparse(x, fill_value=0.0):
+    return x.astype(pd.SparseDtype(float, fill_value=fill_value))
+
+
+def SparseDataFrame(X, columns=None, index=None, default_fill_value=0.0):
+    if sparse.issparse(X):
+        X = pd.DataFrame.sparse.from_spmatrix(X)
+        X.sparse.fill_value = default_fill_value
+    elif isinstance(X, pd.SparseDataFrame) or not isinstance(X, pd.DataFrame):
+        X = pd.DataFrame(X)
+    X = dataframe_to_sparse(X, fill_value=default_fill_value)
+    if columns is not None:
+        X.columns = columns
+    if index is not None:
+        X.index = index
+    return X
+
+
 def matrix_transform(data, fun, *args, **kwargs):
     """Perform a numerical transformation to data
 
@@ -171,7 +269,7 @@ def matrix_transform(data, fun, *args, **kwargs):
     data : array-like, shape=[n_samples, n_features]
         Transformed output data
     """
-    if isinstance(data, pd.SparseDataFrame):
+    if is_sparse_dataframe(data) or isinstance(data, pd.SparseDataFrame):
         data = data.copy()
         for col in data.columns:
             data[col] = fun(data[col], *args, **kwargs)
@@ -213,8 +311,15 @@ def matrix_sum(data, axis=None):
                 index = data.index if axis == 1 else data.columns
                 sums = pd.Series(np.array(data.to_coo().sum(axis)).flatten(),
                                  index=index)
+        elif is_sparse_dataframe(data):
+            if axis is None:
+                sums = data.sparse.to_coo().sum()
+            else:
+                index = data.index if axis == 1 else data.columns
+                sums = pd.Series(np.array(data.sparse.to_coo().sum(axis)).flatten(),
+                                 index=index)
         elif axis is None:
-            sums = data.values.sum()
+            sums = data.to_numpy().sum()
         else:
             sums = data.sum(axis)
     else:
@@ -224,6 +329,65 @@ def matrix_sum(data, axis=None):
     return sums
 
 
+def matrix_std(data, axis=None):
+    """Get the column-wise, row-wise, or total standard deviation of a matrix
+
+    Parameters
+    ----------
+    data : array-like, shape=[n_samples, n_features]
+        Input data
+    axis : int or None, optional (default: None)
+        Axis across which to calculate standard deviation.
+        axis=0 gives column standard deviation,
+        axis=1 gives row standard deviation.
+        None gives the total standard deviation.
+
+    Returns
+    -------
+    std : array-like or float
+        Standard deviation along desired axis.
+    """
+    if axis not in [0, 1, None]:
+        raise ValueError("Expected axis in [0, 1, None]. Got {}".format(axis))
+    index = None
+    if isinstance(data, pd.DataFrame) and axis is not None:
+        if axis == 1:
+            index = data.index
+        elif axis == 0:
+            index = data.columns
+    data = to_array_or_spmatrix(data)
+    if sparse.issparse(data):
+        if axis is None:
+            if isinstance(data, (sparse.lil_matrix, sparse.dok_matrix)):
+                data = data.tocoo()
+            data_sq = data.copy()
+            data_sq.data = data_sq.data ** 2
+            variance = data_sq.mean() - data.mean() ** 2
+            std = np.sqrt(variance)
+        else:
+            if axis == 0:
+                data = data.tocsc()
+                next_fn = data.getcol
+                N = data.shape[1]
+            elif axis == 1:
+                data = data.tocsr()
+                next_fn = data.getrow
+                N = data.shape[0]
+            std = []
+            for i in range(N):
+                col = next_fn(i)
+                col_sq = col.copy()
+                col_sq.data = col_sq.data ** 2
+                variance = col_sq.mean() - col.mean() ** 2
+                std.append(np.sqrt(variance))
+            std = np.array(std)
+    else:
+        std = np.std(data, axis=axis)
+    if index is not None:
+        std = pd.Series(std, index=index, name='std')
+    return std
+
+
 def matrix_vector_elementwise_multiply(data, multiplier, axis=None):
     """Elementwise multiply a matrix by a vector
 
@@ -276,14 +440,18 @@ def matrix_vector_elementwise_multiply(data, multiplier, axis=None):
                     data.shape[1], multiplier.shape))
         multiplier = multiplier.reshape(1, -1)
 
-    if isinstance(data, pd.SparseDataFrame):
+    if isinstance(data, pd.SparseDataFrame) or is_sparse_dataframe(data):
         data = data.copy()
         multiplier = multiplier.flatten()
         if axis == 0:
-            data = data.T
-            for col, mult in zip(data.columns, multiplier):
-                data[col] = data[col] * mult
-            data = data.T
+            for col in data.columns:
+                try:
+                    mult_indices = data[col].values.sp_index.indices
+                except AttributeError:
+                    mult_indices = data[col].values.sp_index.to_int_index().indices
+                new_data = data[col].values.sp_values * multiplier[mult_indices]
+                data[col].values.sp_values.put(np.arange(data[col].sparse.npoints),
+                                              new_data)
         else:
             for col, mult in zip(data.columns, multiplier):
                 data[col] = data[col] * mult
@@ -364,6 +532,48 @@ def matrix_any(condition):
     return np.sum(np.sum(condition)) > 0
 
 
+def check_consistent_columns(data):
+    """Ensure that a set of data matrices have consistent columns
+
+    Parameters
+    ----------
+    data : list of array-likes
+        List of matrices to be checked
+
+    Returns
+    -------
+    data : list of array-likes
+        List of matrices with consistent columns, subsetted if necessary
+
+    Raises
+    ------
+    ValueError
+        Raised if data has inconsistent number of columns and does not
+        have column names for subsetting
+    """
+    matrix_type = type(data[0])
+    matrix_shape = data[0].shape[1]
+    if issubclass(matrix_type, pd.DataFrame):
+        if not (np.all([d.shape[1] == matrix_shape for d in data[1:]]) and
+                np.all([data[0].columns == d.columns for d in data])):
+            common_genes = data[0].columns.values
+            for d in data[1:]:
+                common_genes = common_genes[np.isin(common_genes,
+                                                    d.columns.values)]
+            for i in range(len(data)):
+                data[i] = data[i][common_genes]
+            warnings.warn("Input data has inconsistent column names. "
+                          "Subsetting to {} common columns.".format(
+                              len(common_genes)), UserWarning)
+    else:
+        for d in data[1:]:
+            if not d.shape[1] == matrix_shape:
+                shapes = ", ".join([str(d.shape[1]) for d in data])
+                raise ValueError("Expected data all with the same number of "
+                                 "columns. Got {}".format(shapes))
+    return data
+
+
 def combine_batches(data, batch_labels, append_to_cell_names=None):
     """Combine data matrices from multiple batches and store a batch label
 
@@ -393,6 +603,8 @@ def combine_batches(data, batch_labels, append_to_cell_names=None):
 
     # check consistent type
     matrix_type = type(data[0])
+    if matrix_type is pd.SparseDataFrame:
+        matrix_type = pd.DataFrame
     if not issubclass(matrix_type, (np.ndarray,
                                     pd.DataFrame,
                                     sparse.spmatrix)):
@@ -405,26 +617,7 @@ def combine_batches(data, batch_labels, append_to_cell_names=None):
             raise TypeError("Expected data all of the same class. "
                             "Got {}".format(types))
 
-    # check consistent columns
-    matrix_shape = data[0].shape[1]
-    if issubclass(matrix_type, pd.DataFrame):
-        if not (np.all([d.shape[1] == matrix_shape for d in data[1:]]) and
-                np.all([data[0].columns == d.columns for d in data])):
-            common_genes = data[0].columns.values
-            for d in data[1:]:
-                common_genes = common_genes[np.isin(common_genes,
-                                                    d.columns.values)]
-            for i in range(len(data)):
-                data[i] = data[i][common_genes]
-            warnings.warn("Input data has inconsistent column names. "
-                          "Subsetting to {} common columns.".format(
-                              len(common_genes)), UserWarning)
-    else:
-        for d in data[1:]:
-            if not d.shape[1] == matrix_shape:
-                shapes = ", ".join([str(d.shape[1]) for d in data])
-                raise ValueError("Expected data all with the same number of "
-                                 "columns. Got {}".format(shapes))
+    data = check_consistent_columns(data)
 
     # check append_to_cell_names
     if append_to_cell_names and not issubclass(matrix_type, pd.DataFrame):
@@ -432,7 +625,11 @@ def combine_batches(data, batch_labels, append_to_cell_names=None):
                       " Got {}".format(matrix_type.__name__), UserWarning)
     elif append_to_cell_names is None:
         if issubclass(matrix_type, pd.DataFrame):
-            append_to_cell_names = True
+            if all([isinstance(d.index, pd.RangeIndex) for d in data]):
+                # rangeindex should still be a rangeindex
+                append_to_cell_names = False
+            else:
+                append_to_cell_names = True
         else:
             append_to_cell_names = False
 
@@ -449,6 +646,11 @@ def combine_batches(data, batch_labels, append_to_cell_names=None):
                                           "_" + str(batch_labels[i]))
                  for i, d in enumerate(data)])
             data_combined.index = index
+        elif all([isinstance(d.index, pd.RangeIndex) for d in data]):
+            # rangeindex should still be a rangeindex
+            data_combined = data_combined.reset_index(drop=True)
+        sample_labels = pd.Series(sample_labels, index=data_combined.index,
+                                  name='sample_labels')
     elif issubclass(matrix_type, sparse.spmatrix):
         data_combined = sparse.vstack(data)
     elif issubclass(matrix_type, np.ndarray):
@@ -458,37 +660,25 @@ def combine_batches(data, batch_labels, append_to_cell_names=None):
 
 
 def select_cols(data, idx):
-    warnings.warn("`scprep.utils.select_cols` is deprecated. Use "
-                  "`scprep.select.select_cols` instead.",
-                  FutureWarning)
-    return select.select_cols(data, idx=idx)
+    raise RuntimeError("`scprep.utils.select_cols` is deprecated. Use "
+                       "`scprep.select.select_cols` instead.")
 
 
 def select_rows(data, idx):
-    warnings.warn("`scprep.utils.select_rows` is deprecated. Use "
-                  "`scprep.select.select_rows` instead.",
-                  FutureWarning)
-    return select.select_rows(data, idx=idx)
+    raise RuntimeError("`scprep.utils.select_rows` is deprecated. Use "
+                       "`scprep.select.select_rows` instead.")
 
 
 def get_gene_set(data, starts_with=None, ends_with=None, regex=None):
-    warnings.warn("`scprep.utils.get_gene_set` is deprecated. Use "
-                  "`scprep.select.get_gene_set` instead.",
-                  FutureWarning)
-    return select.get_gene_set(data, starts_with=starts_with,
-                               ends_with=ends_with, regex=regex)
+    raise RuntimeError("`scprep.utils.get_gene_set` is deprecated. Use "
+                       "`scprep.select.get_gene_set` instead.")
 
 
 def get_cell_set(data, starts_with=None, ends_with=None, regex=None):
-    warnings.warn("`scprep.utils.get_cell_set` is deprecated. Use "
-                  "`scprep.select.get_cell_set` instead.",
-                  FutureWarning)
-    return select.get_cell_set(data, starts_with=starts_with,
-                               ends_with=ends_with, regex=regex)
+    raise RuntimeError("`scprep.utils.get_cell_set` is deprecated. Use "
+                       "`scprep.select.get_cell_set` instead.")
 
 
 def subsample(*data, n=10000, seed=None):
-    warnings.warn("`scprep.utils.subsample` is deprecated. Use "
-                  "`scprep.select.subsample` instead.",
-                  FutureWarning)
-    return select.subsample(*data, n=n, seed=seed)
+    raise RuntimeError("`scprep.utils.subsample` is deprecated. Use "
+                       "`scprep.select.subsample` instead.")
diff --git a/scprep/version.py b/scprep/version.py
index 54306210..1b0953c5 100644
--- a/scprep/version.py
+++ b/scprep/version.py
@@ -1,4 +1,4 @@
 # author: Scott Gigante <scott.gigante@yale.edu>
 # (C) 2018 Krishnaswamy Lab GPLv2
 
-__version__ = "0.12.2"
+__version__ = "1.0.0"
diff --git a/setup.py b/setup.py
index 3871b98b..0c49f2df 100644
--- a/setup.py
+++ b/setup.py
@@ -3,11 +3,11 @@
 from setuptools import setup, find_packages
 
 install_requires = [
-    'numpy>=1.10.0',
-    'scipy>=0.18.0',
+    'numpy>=1.12.0',
+    'scipy>=0.18.1',
     'scikit-learn>=0.19.1',
-    'pandas>=0.19.0,<0.24',
-    'decorator>=4.3.0'
+    'decorator>=4.3.0',
+    'pandas>=0.25',
 ]
 
 test_requires = [
@@ -16,9 +16,10 @@
     'fcsparser',
     'tables',
     'h5py',
-    'rpy2>=3.0',
     'coverage',
-    'coveralls'
+    'coveralls',
+    'parameterized',
+    'statsmodels',
 ]
 
 doc_requires = [
@@ -32,9 +33,9 @@
 if sys.version_info[:2] < (3, 5):
     raise RuntimeError("Python version >=3.5 required.")
 elif sys.version_info[:2] < (3, 6):
-    test_requires += ['matplotlib>=3.0,<3.1']
+    test_requires += ['matplotlib>=3.0,<3.1', 'rpy2>=3.0,<3.1']
 else:
-    test_requires += ['matplotlib>=3.0']
+    test_requires += ['matplotlib>=3.0', 'rpy2>=3.0']
 
 version_py = os.path.join(os.path.dirname(
     __file__), 'scprep', 'version.py')
@@ -46,7 +47,7 @@
 setup(name='scprep',
       version=version,
       description='scprep',
-      author='Jay Stanley, Scott Gigante, and Daniel Burkhardt, Krishnaswamy Lab, Yale University',
+      author='Scott Gigante, Daniel Burkhardt and Jay Stanley, Yale University',
       author_email='krishnaswamylab@gmail.com',
       packages=find_packages(),
       license='GNU General Public License Version 2',
diff --git a/test.png b/test.png
new file mode 100644
index 00000000..01c74e2d
Binary files /dev/null and b/test.png differ
diff --git a/test/test_filter.py b/test/test_filter.py
index 24e30c97..d55b3489 100644
--- a/test/test_filter.py
+++ b/test/test_filter.py
@@ -85,6 +85,7 @@ def test_filter_rare_genes(self):
             self.X_dense, utils.assert_transform_equals,
             Y=X_filtered, transform=scprep.filter.filter_rare_genes)
 
+
     def test_library_size_filter(self):
         X_filtered = scprep.filter.filter_library_size(
             self.X_sparse, cutoff=100)
@@ -209,7 +210,7 @@ def test_gene_expression_filter_warning(self):
             self.X_sparse, genes=genes, cutoff=None, percentile=None)
         assert_raise_message(
             KeyError,
-            "the label [not_a_gene] is not in the [columns]",
+            "not_a_gene",
             scprep.filter.filter_gene_set_expression,
             self.X_sparse, genes=no_genes, percentile=90.0, keep_cells='below')
 
@@ -266,7 +267,13 @@ def test_deprecated_sample_labels(self):
 
 
 def test_large_sparse_dataframe_library_size():
+    matrix._ignore_pandas_sparse_warning()
     X = pd.SparseDataFrame(sparse.coo_matrix((10**7, 2 * 10**4)),
                            default_fill_value=0.0)
     cell_sums = scprep.measure.library_size(X)
     assert cell_sums.shape[0] == X.shape[0]
+    matrix._reset_warnings()
+    X = matrix.SparseDataFrame(sparse.coo_matrix((10**7, 2 * 10**4)),
+                               default_fill_value=0.0)
+    cell_sums = scprep.measure.library_size(X)
+    assert cell_sums.shape[0] == X.shape[0]
diff --git a/test/test_hdf5.py b/test/test_hdf5.py
index 26831f3c..78c3f319 100644
--- a/test/test_hdf5.py
+++ b/test/test_hdf5.py
@@ -14,7 +14,7 @@ def test_failed_import_tables():
     tables = scprep.io.hdf5.tables
     del scprep.io.hdf5.tables
     assert hdf5_available() is True
-    with tables.File(h5_file) as f:
+    with tables.File(h5_file, 'r') as f:
         assert scprep.io.hdf5._is_tables(f) is False
     with scprep.io.hdf5.open_file(h5_file) as f:
         assert scprep.io.hdf5._is_h5py(f)
@@ -26,7 +26,7 @@ def test_failed_import_h5py():
     h5py = scprep.io.hdf5.h5py
     del scprep.io.hdf5.h5py
     assert hdf5_available() is True
-    with h5py.File(h5_file) as f:
+    with h5py.File(h5_file, 'r') as f:
         assert scprep.io.hdf5._is_h5py(f) is False
     scprep.io.hdf5.h5py = h5py
 
diff --git a/test/test_io.py b/test/test_io.py
index 401188e9..a3218a79 100644
--- a/test/test_io.py
+++ b/test/test_io.py
@@ -1,20 +1,120 @@
-from tools import data
+from tools import data, utils
 import scprep
 import scprep.io.utils
-from sklearn.utils.testing import assert_warns_message, assert_raise_message
+from sklearn.utils.testing import assert_warns_message, assert_raise_message, assert_raises
 import pandas as pd
 import numpy as np
+from scipy import sparse
 import os
+import shutil
 import fcsparser
 import zipfile
 import urllib
+import unittest
+
+
+class TestMatrixToDataFrame(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(self):
+        self.X_dense = data.load_10X(sparse=False)
+        self.X_sparse = data.load_10X(sparse=True)
+        self.X_numpy = self.X_dense.to_numpy()
+        self.X_coo = self.X_sparse.sparse.to_coo()
+        self.cell_names = self.X_dense.index
+        self.gene_names = self.X_dense.columns
+
+    def test_matrix_to_dataframe_no_names_sparse(self):
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_numpy, sparse=True)
+        assert isinstance(Y, sparse.csr_matrix)
+        assert np.all(scprep.utils.toarray(Y) == self.X_numpy)
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_coo, sparse=True)
+        assert isinstance(Y, sparse.spmatrix)
+        assert np.all(scprep.utils.toarray(Y) == self.X_numpy)
+
+    def test_matrix_to_dataframe_no_names_dataframe_sparse(self):
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_dense, sparse=True)
+        assert scprep.utils.is_sparse_dataframe(Y)
+        assert not isinstance(Y, pd.SparseDataFrame)
+        assert np.all(scprep.utils.toarray(Y) == self.X_numpy)
+        utils.assert_matrix_class_equivalent(Y, self.X_sparse)
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_sparse, sparse=True)
+        assert scprep.utils.is_sparse_dataframe(Y)
+        assert not isinstance(Y, pd.SparseDataFrame)
+        assert np.all(scprep.utils.toarray(Y) == self.X_numpy)
+        utils.assert_matrix_class_equivalent(Y, self.X_sparse)
+
+    def test_matrix_to_dataframe_no_names_dense(self):
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_numpy, sparse=False)
+        assert isinstance(Y, np.ndarray)
+        assert np.all(Y == self.X_numpy)
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_coo, sparse=False)
+        assert isinstance(Y, np.ndarray)
+        assert np.all(Y == self.X_numpy)
+
+    def test_matrix_to_dataframe_no_names_dataframe_dense(self):
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_dense, sparse=False)
+        assert isinstance(Y, pd.DataFrame)
+        assert not scprep.utils.is_sparse_dataframe(Y)
+        assert not isinstance(Y, pd.SparseDataFrame)
+        assert np.all(scprep.utils.toarray(Y) == self.X_numpy)
+        utils.assert_matrix_class_equivalent(Y, self.X_dense)
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_sparse, sparse=False)
+        assert isinstance(Y, pd.DataFrame)
+        assert not scprep.utils.is_sparse_dataframe(Y)
+        assert not isinstance(Y, pd.SparseDataFrame)
+        assert np.all(scprep.utils.toarray(Y) == self.X_numpy)
+        utils.assert_matrix_class_equivalent(Y, self.X_dense)
+
+    def test_matrix_to_dataframe_names_sparse(self):
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_dense, cell_names=self.cell_names,
+                                                 gene_names=self.gene_names, sparse=True)
+        assert scprep.utils.is_sparse_dataframe(Y)
+        assert not isinstance(Y, pd.SparseDataFrame)
+        assert np.all(scprep.utils.toarray(Y) == self.X_numpy)
+        utils.assert_matrix_class_equivalent(Y, self.X_sparse)
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_sparse, cell_names=self.cell_names,
+                                                 gene_names=self.gene_names, sparse=True)
+        assert scprep.utils.is_sparse_dataframe(Y)
+        assert not isinstance(Y, pd.SparseDataFrame)
+        assert np.all(scprep.utils.toarray(Y) == self.X_numpy)
+        utils.assert_matrix_class_equivalent(Y, self.X_sparse)
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_numpy, cell_names=self.cell_names,
+                                                 gene_names=self.gene_names, sparse=True)
+        assert scprep.utils.is_sparse_dataframe(Y)
+        assert not isinstance(Y, pd.SparseDataFrame)
+        assert np.all(scprep.utils.toarray(Y) == self.X_numpy)
+        utils.assert_matrix_class_equivalent(Y, self.X_sparse)
+
+    def test_matrix_to_dataframe_names_dense(self):
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_dense, cell_names=self.cell_names,
+                                                 gene_names=self.gene_names, sparse=False)
+        assert isinstance(Y, pd.DataFrame)
+        assert not scprep.utils.is_sparse_dataframe(Y)
+        assert not isinstance(Y, pd.SparseDataFrame)
+        assert np.all(scprep.utils.toarray(Y) == self.X_numpy)
+        utils.assert_matrix_class_equivalent(Y, self.X_dense)
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_sparse, cell_names=self.cell_names,
+                                                 gene_names=self.gene_names, sparse=False)
+        assert isinstance(Y, pd.DataFrame)
+        assert not scprep.utils.is_sparse_dataframe(Y)
+        assert not isinstance(Y, pd.SparseDataFrame)
+        assert np.all(scprep.utils.toarray(Y) == self.X_numpy)
+        utils.assert_matrix_class_equivalent(Y, self.X_dense)
+        Y = scprep.io.utils._matrix_to_data_frame(self.X_numpy, cell_names=self.cell_names,
+                                                 gene_names=self.gene_names, sparse=False)
+        assert isinstance(Y, pd.DataFrame)
+        assert not scprep.utils.is_sparse_dataframe(Y)
+        assert not isinstance(Y, pd.SparseDataFrame)
+        assert np.all(scprep.utils.toarray(Y) == self.X_numpy)
+        utils.assert_matrix_class_equivalent(Y, self.X_dense)
 
 
 def test_10X_duplicate_gene_names():
     assert_warns_message(
         RuntimeWarning,
-        "Duplicate gene names detected! Forcing `gene_labels='id'`. "
-        "Alternatively, try `gene_labels='both'`, `allow_duplicates=True`, or "
+        "Duplicate gene names detected! Forcing `gene_labels='both'`. "
+        "Alternatively, try `gene_labels='id'`, `allow_duplicates=True`, or "
         "load the matrix with `sparse=False`",
         scprep.io.load_10X,
         os.path.join(data.data_dir, "test_10X_duplicate_gene_names"),
@@ -32,16 +132,16 @@ def test_10X_duplicate_gene_names():
 def test_10X():
     X = data.load_10X()
     assert X.shape == (100, 100)
-    assert isinstance(X, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X)
     assert X.columns[0] == "Arl8b"
     X = data.load_10X(gene_labels='id', sparse=False)
     assert X.shape == (100, 100)
     assert isinstance(X, pd.DataFrame)
-    assert not isinstance(X, pd.SparseDataFrame)
+    assert not scprep.utils.is_sparse_dataframe(X)
     assert X.columns[0] == "ENSMUSG00000030105"
     X = data.load_10X(gene_labels='both')
     assert X.shape == (100, 100)
-    assert isinstance(X, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X)
     assert X.columns[0] == "Arl8b (ENSMUSG00000030105)"
     X_cellranger3 = scprep.io.load_10X(
         os.path.join(data.data_dir, "test_10X_cellranger3"),
@@ -74,7 +174,7 @@ def test_10X_zip():
     filename = os.path.join(data.data_dir, "test_10X.zip")
     X_zip = scprep.io.load_10X_zip(
         filename)
-    assert isinstance(X_zip, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X_zip)
     assert np.sum(np.sum(X != X_zip)) == 0
     np.testing.assert_array_equal(X.columns, X_zip.columns)
     np.testing.assert_array_equal(X.index, X_zip.index)
@@ -99,7 +199,7 @@ def test_10X_zip_url():
     filename = "https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X.zip"
     X_zip = scprep.io.load_10X_zip(
         filename)
-    assert isinstance(X_zip, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X_zip)
     assert np.sum(np.sum(X != X_zip)) == 0
     np.testing.assert_array_equal(X.columns, X_zip.columns)
     np.testing.assert_array_equal(X.index, X_zip.index)
@@ -114,9 +214,8 @@ def test_10X_zip_url_not_a_zip():
 
 
 def test_10X_zip_url_not_a_real_website():
-    assert_raise_message(
+    assert_raises(
         urllib.error.URLError,
-        "<urlopen error [Errno -2] Name or service not known>",
         scprep.io.load_10X_zip,
         'http://invalid.not.a.url/scprep')
 
@@ -142,19 +241,19 @@ def test_10X_HDF5():
     h5_file = os.path.join(data.data_dir, "test_10X.h5")
     # automatic tables backend
     X_hdf5 = scprep.io.load_10X_HDF5(h5_file)
-    assert isinstance(X_hdf5, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X_hdf5)
     assert np.sum(np.sum(X != X_hdf5)) == 0
     np.testing.assert_array_equal(X.columns, X_hdf5.columns)
     np.testing.assert_array_equal(X.index, X_hdf5.index)
     # explicit tables backend
     X_hdf5 = scprep.io.load_10X_HDF5(h5_file, backend='tables')
-    assert isinstance(X_hdf5, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X_hdf5)
     assert np.sum(np.sum(X != X_hdf5)) == 0
     np.testing.assert_array_equal(X.columns, X_hdf5.columns)
     np.testing.assert_array_equal(X.index, X_hdf5.index)
     # explicit h5py backend
     X_hdf5 = scprep.io.load_10X_HDF5(h5_file, backend='h5py')
-    assert isinstance(X_hdf5, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X_hdf5)
     assert np.sum(np.sum(X != X_hdf5)) == 0
     np.testing.assert_array_equal(X.columns, X_hdf5.columns)
     np.testing.assert_array_equal(X.index, X_hdf5.index)
@@ -162,7 +261,7 @@ def test_10X_HDF5():
     tables = scprep.io.hdf5.tables
     del scprep.io.hdf5.tables
     X_hdf5 = scprep.io.load_10X_HDF5(h5_file)
-    assert isinstance(X_hdf5, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X_hdf5)
     assert np.sum(np.sum(X != X_hdf5)) == 0
     np.testing.assert_array_equal(X.columns, X_hdf5.columns)
     np.testing.assert_array_equal(X.index, X_hdf5.index)
@@ -174,19 +273,19 @@ def test_10X_HDF5_cellranger3():
     h5_file = os.path.join(data.data_dir, "test_10X_cellranger3.h5")
     # automatic tables backend
     X_hdf5 = scprep.io.load_10X_HDF5(h5_file)
-    assert isinstance(X_hdf5, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X_hdf5)
     assert np.sum(np.sum(X != X_hdf5)) == 0
     np.testing.assert_array_equal(X.columns, X_hdf5.columns)
     np.testing.assert_array_equal(X.index, X_hdf5.index)
     # explicit tables backend
     X_hdf5 = scprep.io.load_10X_HDF5(h5_file, backend='tables')
-    assert isinstance(X_hdf5, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X_hdf5)
     assert np.sum(np.sum(X != X_hdf5)) == 0
     np.testing.assert_array_equal(X.columns, X_hdf5.columns)
     np.testing.assert_array_equal(X.index, X_hdf5.index)
     # explicit h5py backend
     X_hdf5 = scprep.io.load_10X_HDF5(h5_file, backend='h5py')
-    assert isinstance(X_hdf5, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X_hdf5)
     assert np.sum(np.sum(X != X_hdf5)) == 0
     np.testing.assert_array_equal(X.columns, X_hdf5.columns)
     np.testing.assert_array_equal(X.index, X_hdf5.index)
@@ -194,7 +293,7 @@ def test_10X_HDF5_cellranger3():
     tables = scprep.io.hdf5.tables
     del scprep.io.hdf5.tables
     X_hdf5 = scprep.io.load_10X_HDF5(h5_file)
-    assert isinstance(X_hdf5, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X_hdf5)
     assert np.sum(np.sum(X != X_hdf5)) == 0
     np.testing.assert_array_equal(X.columns, X_hdf5.columns)
     np.testing.assert_array_equal(X.index, X_hdf5.index)
@@ -276,7 +375,7 @@ def test_csv_and_tsv():
     np.testing.assert_array_equal(X_csv.columns, X_csv4.index)
     np.testing.assert_array_equal(X_csv.index, X_csv4.columns)
     assert isinstance(X_csv, pd.DataFrame)
-    assert not isinstance(X_csv, pd.SparseDataFrame)
+    assert not scprep.utils.is_sparse_dataframe(X_csv)
     X_csv = scprep.io.load_csv(
         os.path.join(data.data_dir, "test_small.csv"),
         gene_names=os.path.join(
@@ -289,7 +388,7 @@ def test_csv_and_tsv():
     np.testing.assert_array_equal(X.columns, X_csv.columns)
     np.testing.assert_array_equal(X.index, X_csv.index)
     assert isinstance(X_csv, pd.DataFrame)
-    assert not isinstance(X_csv, pd.SparseDataFrame)
+    assert not scprep.utils.is_sparse_dataframe(X_csv)
     X_csv = scprep.io.load_csv(
         os.path.join(data.data_dir, "test_small.csv"),
         gene_names=X.columns,
@@ -300,7 +399,7 @@ def test_csv_and_tsv():
     np.testing.assert_array_equal(X.columns, X_csv.columns)
     np.testing.assert_array_equal(X.index, X_csv.index)
     assert isinstance(X_csv, pd.DataFrame)
-    assert not isinstance(X_csv, pd.SparseDataFrame)
+    assert not scprep.utils.is_sparse_dataframe(X_csv)
     X_csv = scprep.io.load_csv(
         os.path.join(data.data_dir, "test_small.csv"),
         gene_names=None,
@@ -308,8 +407,8 @@ def test_csv_and_tsv():
         sparse=True,
         skiprows=1,
         usecols=range(1, 101))
-    assert np.sum(np.sum(X.values != X_csv.values)) == 0
-    assert isinstance(X_csv, pd.SparseDataFrame)
+    assert np.sum(np.sum(X.to_numpy() != X_csv.to_numpy())) == 0
+    assert scprep.utils.is_sparse_dataframe(X_csv)
     X_csv = scprep.io.load_csv(
         os.path.join(data.data_dir,
                      "test_small_duplicate_gene_names.csv"))
@@ -333,26 +432,26 @@ def test_mtx():
         cell_names=os.path.join(
             data.data_dir, "barcodes.tsv"),
         cell_axis="column")
-    assert np.sum(np.sum(X.values != X_mtx.values)) == 0
+    assert np.sum(np.sum(X.to_numpy() != X_mtx.to_numpy())) == 0
     np.testing.assert_array_equal(X.columns, X_mtx.columns)
     np.testing.assert_array_equal(X.index, X_mtx.index)
-    assert isinstance(X_mtx, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X_mtx)
     X_mtx = scprep.io.load_mtx(
         filename,
         gene_names=X.columns,
         cell_names=X.index,
         cell_axis="column")
-    assert np.sum(np.sum(X.values != X_mtx.values)) == 0
+    assert np.sum(np.sum(X.to_numpy() != X_mtx.to_numpy())) == 0
     np.testing.assert_array_equal(X.columns, X_mtx.columns)
     np.testing.assert_array_equal(X.index, X_mtx.index)
-    assert isinstance(X_mtx, pd.SparseDataFrame)
+    assert scprep.utils.is_sparse_dataframe(X_mtx)
     X_mtx = scprep.io.load_mtx(
         filename,
         gene_names=None,
         cell_names=None,
         sparse=False,
         cell_axis="column")
-    assert np.sum(np.sum(X.values != X_mtx)) == 0
+    assert np.sum(np.sum(X.to_numpy() != X_mtx)) == 0
     assert isinstance(X_mtx, np.ndarray)
     assert_raise_message(
         ValueError,
@@ -360,14 +459,34 @@ def test_mtx():
         "Expected 'row' or 'column'",
         scprep.io.load_mtx, filename,
         cell_axis='neither')
-    X = scprep.io.load_mtx(
+    X_mtx = scprep.io.load_mtx(
         filename,
         gene_names=np.arange(X.shape[1]).astype('str'),
         cell_names=np.arange(X.shape[0]))
-    assert X.shape == (100, 100)
-    assert isinstance(X, pd.SparseDataFrame)
-    assert X.columns[0] == "0"
-    assert X.index[0] == 0
+    assert X_mtx.shape == (100, 100)
+    assert scprep.utils.is_sparse_dataframe(X_mtx)
+    assert X_mtx.columns[0] == "0"
+    assert X_mtx.index[0] == 0
+
+
+def test_save_mtx():
+    filename = os.path.join(data.data_dir, "test_10X", "matrix.mtx.gz")
+    X = scprep.io.load_mtx(
+        filename,
+        gene_names=os.path.join(
+            data.data_dir, "gene_symbols.csv"),
+        cell_names=os.path.join(
+            data.data_dir, "barcodes.tsv"),
+        cell_axis="column")
+    scprep.io.save_mtx(X, "test_mtx")
+    Y = scprep.io.load_mtx(
+        "test_mtx/matrix.mtx",
+        gene_names="test_mtx/gene_names.tsv",
+        cell_names="test_mtx/cell_names.tsv")
+    np.testing.assert_array_equal(X, Y)
+    assert np.all(X.index == Y.index)
+    assert np.all(X.columns == Y.columns)
+    shutil.rmtree("test_mtx")
 
 
 def test_fcs():
@@ -377,13 +496,13 @@ def test_fcs():
     assert 'Time' not in X.columns
     assert len(set(X.columns).difference(data.columns)) == 0
     np.testing.assert_array_equal(X.index, data.index)
-    np.testing.assert_array_equal(X.values, data[X.columns].values)
+    np.testing.assert_array_equal(X.to_numpy(), data[X.columns].to_numpy())
     _, _, X = scprep.io.load_fcs(path, sparse=True)
     assert 'Time' not in X.columns
     assert len(set(X.columns).difference(data.columns)) == 0
     np.testing.assert_array_equal(X.index, data.index)
     np.testing.assert_array_equal(
-        X.to_dense().values, data[X.columns].values)
+        X.sparse.to_dense().to_numpy(), data[X.columns].to_numpy())
 
     X_meta, _, X = scprep.io.load_fcs(path, reformat_meta=False, override=True)
     assert set(meta.keys()) == set(X_meta.keys())
@@ -482,3 +601,62 @@ def test_parse_header():
         ValueError,
         "Expected 50 entries in {}. Got 100".format(os.path.abspath(header2)),
         scprep.io.utils._parse_header, header2, 50)
+
+def test_download_google_drive():
+    id = "1_T5bRqbid5mtuDYnyusoGvujc6fW1UKv"
+    dest = "test.txt"
+    scprep.io.download.download_google_drive(id, dest)
+    assert os.path.isfile(dest)
+    with open(dest, 'r') as f:
+        data = f.read()
+        assert data == 'test\n', data
+    os.remove(dest)
+
+def test_download_google_drive_large():
+    id = "1FDDSWtSZcdQUVKpk-mPCZ8Ji1Fx8KSz9"
+    response = scprep.io.download._GET_google_drive(id)
+    assert response.status_code == 200
+    response.close()
+
+def test_download_url():
+    X = data.load_10X()
+    scprep.io.download.download_url("https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X/matrix.mtx.gz", "url_test.mtx.gz")
+    Y = scprep.io.load_mtx("url_test.mtx.gz").T
+    assert (X.sparse.to_coo() - Y).nnz == 0
+    os.remove("url_test.mtx.gz")
+
+def test_download_zip():
+    X = data.load_10X()
+    scprep.io.download.download_and_extract_zip("https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X.zip", "zip_test")
+    Y = scprep.io.load_10X("zip_test/test_10X")
+    assert np.all(X == Y)
+    assert np.all(X.index == Y.index)
+    assert np.all(X.columns == Y.columns)
+    shutil.rmtree("zip_test")
+
+def test_unzip_no_destination():
+    X = data.load_10X()
+    filename = os.path.join(data.data_dir, "test_10X.zip")
+    tmp_filename = os.path.join("zip_test", "zip_extract_test.zip")
+    os.mkdir("zip_test")
+    shutil.copyfile(filename, tmp_filename)
+    scprep.io.download.unzip(tmp_filename, delete=False)
+    assert os.path.isfile(tmp_filename)
+    Y = scprep.io.load_10X("zip_test/test_10X")
+    assert np.all(X == Y)
+    assert np.all(X.index == Y.index)
+    assert np.all(X.columns == Y.columns)
+    shutil.rmtree("zip_test")
+
+def test_unzip_destination():
+    X = data.load_10X()
+    filename = os.path.join(data.data_dir, "test_10X.zip")
+    tmp_filename = "zip_extract_test.zip"
+    shutil.copyfile(filename, tmp_filename)
+    scprep.io.download.unzip(tmp_filename, destination="zip_test")
+    assert not os.path.isfile(tmp_filename)
+    Y = scprep.io.load_10X("zip_test/test_10X")
+    assert np.all(X == Y)
+    assert np.all(X.index == Y.index)
+    assert np.all(X.columns == Y.columns)
+    shutil.rmtree("zip_test")
\ No newline at end of file
diff --git a/test/test_measure.py b/test/test_measure.py
index 1f54fd69..e54e61f1 100644
--- a/test/test_measure.py
+++ b/test/test_measure.py
@@ -51,3 +51,27 @@ def test_array_all(self):
             self.X_dense, utils.assert_transform_equals,
             Y=self.Y, transform=scprep.measure.gene_set_expression,
             genes=[0])
+
+    def test_library_size(self):
+        def test_fun(X):
+            x = scprep.measure.library_size(X)
+            assert x.name == 'library_size'
+            assert np.all(x.index == self.X_dense.index)
+        matrix.test_pandas_matrix_types(
+            self.X_dense, test_fun)
+
+    def test_gene_set_expression(self):
+        def test_fun(X):
+            x = scprep.measure.gene_set_expression(X, genes=[0, 1])
+            assert x.name == 'expression'
+            assert np.all(x.index == self.X_dense.index)
+        matrix.test_pandas_matrix_types(
+            self.X_dense, test_fun)
+
+    def test_variable_genes(self):
+        def test_fun(X):
+            x = scprep.measure.gene_variability(X)
+            assert x.name == 'variability'
+            assert np.all(x.index == self.X_dense.columns)
+        matrix.test_pandas_matrix_types(
+            self.X_dense, test_fun)
diff --git a/test/test_normalize.py b/test/test_normalize.py
index 60c4c341..29e24d42 100644
--- a/test/test_normalize.py
+++ b/test/test_normalize.py
@@ -23,12 +23,13 @@ def test_libsize_norm_rescale_median(self):
         Y = self.X_norm * self.median
         utils.assert_all_close(Y.sum(1), np.median(np.sum(self.X, 1)))
         Y2, libsize2 = scprep.normalize.library_size_normalize(
-            self.X, return_library_size=True)
+            self.X, rescale='median', return_library_size=True)
         np.testing.assert_allclose(Y, Y2)
         np.testing.assert_allclose(self.libsize, libsize2)
         matrix.test_all_matrix_types(
             self.X, utils.assert_transform_equivalent, Y=Y,
             transform=scprep.normalize.library_size_normalize,
+            rescale='median',
             check=utils.assert_all_close)
 
     def test_libsize_norm_return_libsize(self):
diff --git a/test/test_patch.py b/test/test_patch.py
index 86f99543..150a92c7 100644
--- a/test/test_patch.py
+++ b/test/test_patch.py
@@ -1,6 +1,7 @@
 import scprep
 import numpy as np
 import pandas as pd
+from pandas.core.internals.blocks import ExtensionBlock
 
 
 def test_pandas_series_rmatmul():
@@ -9,3 +10,19 @@ def test_pandas_series_rmatmul():
     df = pd.DataFrame(mat)
     ser = pd.Series(arr)
     np.testing.assert_array_equal(mat @ ser, (df @ ser).values)
+
+def test_pandas_sparse_iloc():
+    X = pd.DataFrame([[0,1,1], [0,0,1], [0,0,0]]).astype(pd.SparseDtype(float, fill_value=0.0))
+    assert np.all(~np.isnan(X.iloc[[0,1]].to_numpy()))
+
+
+class CustomBlock(ExtensionBlock):
+    _holder = np.ndarray
+
+def test_fill_value():
+    values = pd.Series(np.arange(3), dtype=pd.UInt16Dtype())
+    custom_block = CustomBlock(values, placement=slice(1, 2))
+    assert np.isnan(custom_block.fill_value)
+    values = pd.Series(np.arange(3), dtype=pd.SparseDtype(float, 0.0))
+    custom_block = CustomBlock(values, placement=slice(1, 2))
+    assert not np.isnan(custom_block.fill_value)
diff --git a/test/test_plot.py b/test/test_plot.py
index 4b3c34be..4f56d919 100644
--- a/test/test_plot.py
+++ b/test/test_plot.py
@@ -2,7 +2,9 @@
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
+import pandas as pd
 import os
+import numbers
 from sklearn.utils.testing import assert_raise_message, assert_warns_message
 import unittest
 import scprep
@@ -89,10 +91,6 @@ def test_tab30():
                                                10, 12, 13, 14, 16, 17, 18]])
 
 
-def test_is_color_array_none():
-    assert not scprep.plot.utils._is_color_array(None)
-
-
 def test_tab40():
     cmap = scprep.plot.colors.tab40()
     np.testing.assert_array_equal(
@@ -142,6 +140,106 @@ def test_tab10_continuous_invalid_n_colors():
         n_step=1)
 
 
+def test_tab_exact():
+    assert scprep.plot.colors.tab(1) is plt.cm.tab10
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(10).colors, plt.cm.tab10.colors)
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(20).colors, plt.cm.tab20.colors)
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(30).colors, scprep.plot.colors.tab30().colors)
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(40).colors, scprep.plot.colors.tab40().colors)
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(50).colors,
+        scprep.plot.colors.tab10_continuous(n_colors=10, n_step=5).colors)
+
+
+def test_tab_first10():
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(19).colors[:10], plt.cm.tab10.colors)
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(29).colors[:10],
+        scprep.plot.colors.tab30().colors[::3])
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(39).colors[:10],
+        scprep.plot.colors.tab40().colors[::4])
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(49).colors[:10],
+        scprep.plot.colors.tab10_continuous(
+            n_colors=10, n_step=5).colors[::5])
+
+
+def test_tab_first20():
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(29).colors[10:20],
+        scprep.plot.colors.tab30().colors[1::3])
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(39).colors[10:20],
+        scprep.plot.colors.tab40().colors[1::4])
+
+
+def test_tab_first30():
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(39).colors[20:30],
+        scprep.plot.colors.tab40().colors[2::4])
+
+
+def test_tab_overhang():
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(9).colors, plt.cm.tab10.colors[:9])
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(19).colors[10:], plt.cm.tab20.colors[1:-1:2])
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(29).colors[20:],
+        scprep.plot.colors.tab30().colors[2:-1:3])
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(39).colors[30:],
+        scprep.plot.colors.tab40().colors[3:-1:4])
+    np.testing.assert_array_equal(
+        scprep.plot.colors.tab(49).colors[40:],
+        scprep.plot.colors.tab10_continuous(
+            n_colors=10, n_step=5).colors[4:-1:5])
+
+
+def test_tab_invalid():
+    assert_raise_message(
+        ValueError,
+        "Expected n >= 1. Got 0",
+        scprep.plot.colors.tab,
+        n=0)
+
+
+def test_is_color_array_none():
+    assert not scprep.plot.utils._is_color_array(None)
+
+
+def test_histogram_log_negative_min():
+    assert_warns_message(
+        UserWarning,
+        "Expected positive data for log = x. Got min(data) = -1.00",
+        scprep.plot.histogram,
+        [-1, 1, 1, 1], log='x')
+    assert_warns_message(
+        UserWarning,
+        "Expected positive data for log = True. Got min(data) = -1.00",
+        scprep.plot.histogram,
+        [-1, 1, 1, 1], log=True)
+
+
+def test_histogram_log_negative_max():
+    assert_raise_message(
+        ValueError,
+        "Expected positive data for log = x. Got max(data) = -1.00",
+        scprep.plot.histogram,
+        [-1, -1, -1, -2], log='x')
+    assert_raise_message(
+        ValueError,
+        "Expected positive data for log = True. Got max(data) = -1.00",
+        scprep.plot.histogram,
+        [-1, -1, -1, -2], log=True)
+
+
 class TestScatterParams(unittest.TestCase):
 
     @classmethod
@@ -180,6 +278,27 @@ def test_plot_idx_no_shuffle(self):
         np.testing.assert_equal(params.c, self.c)
         np.testing.assert_equal(params.s, np.abs(self.x))
 
+    def test_plot_idx_mask(self):
+        params = _ScatterParams(x=self.x, y=self.y,
+                                z=self.z, c=self.c,
+                                mask=self.x > 0, shuffle=False)
+        np.testing.assert_equal(params.plot_idx, np.arange(params.size)[self.x > 0])
+        np.testing.assert_equal(params.x, self.x[self.x > 0])
+        np.testing.assert_equal(params.y, self.y[self.x > 0])
+        np.testing.assert_equal(params.z, self.z[self.x > 0])
+        np.testing.assert_equal(params.c, self.c[self.x > 0])
+
+    def test_plot_idx_mask_shuffle(self):
+        params = _ScatterParams(x=self.x, y=self.y,
+                                mask=self.x > 0, shuffle=True)
+        np.testing.assert_equal(np.sort(params.plot_idx), np.arange(params.size)[self.x > 0])
+        assert np.all(params.x > 0)
+
+    def test_data_int(self):
+        params = _ScatterParams(x=1, y=2)
+        np.testing.assert_equal(params._data, [np.array([1]), np.array([2])])
+        assert params.subplot_kw == {}
+
     def test_data_2d(self):
         params = _ScatterParams(x=self.x, y=self.y)
         np.testing.assert_equal(params._data, [self.x,
@@ -301,8 +420,11 @@ def test_discrete_tab20(self):
         assert params.extend is None
         assert isinstance(params.cmap, matplotlib.colors.ListedColormap)
         np.testing.assert_equal(
-            params.cmap.colors,
-            plt.cm.tab20.colors[:len(np.unique(np.round(self.c % 1, 1)))])
+            params.cmap.colors[:10],
+            plt.cm.tab10.colors)
+        np.testing.assert_equal(
+            params.cmap.colors[10:],
+            plt.cm.tab20.colors[1:1 + (len(params.cmap.colors) - 10) * 2:2])
 
     def test_continuous_less_than_20(self):
         params = _ScatterParams(x=self.x, y=self.y,
@@ -581,6 +703,38 @@ def test_check_cmap_scale(self):
             c=np.where(self.c > 0, '+', '-'),
         )
 
+    def test_series_labels(self):
+        params = _ScatterParams(x=pd.Series(self.x, name='x'), y=self.y, c=self.c)
+        assert params.xlabel == 'x'
+        assert params.ylabel is None
+        assert params.zlabel is None
+        params = _ScatterParams(x=self.x, y=pd.Series(self.y, name='y'), c=self.c)
+        assert params.xlabel is None
+        assert params.ylabel == 'y'
+        assert params.zlabel is None
+        params = _ScatterParams(x=self.x, y=self.y, z=pd.Series(self.y, name='z'), c=self.c)
+        assert params.xlabel is None
+        assert params.ylabel is None
+        assert params.zlabel == 'z'
+        # xlabel overrides series
+        params = _ScatterParams(x=pd.Series(self.x, name='x'), y=self.y, c=self.c,
+                               xlabel='y')
+        assert params.xlabel == 'y'
+        assert params.ylabel is None
+        assert params.zlabel is None
+        # label_prefix overrides series
+        params = _ScatterParams(x=pd.Series(self.x, name='x'), y=self.y, c=self.c,
+                               label_prefix='y')
+        assert params.xlabel == 'y1'
+        assert params.ylabel == 'y2'
+        assert params.zlabel is None
+        # xlabel overrides label_prefix
+        params = _ScatterParams(x=pd.Series(self.x, name='x'), y=self.y, z=self.y, c=self.c,
+                               label_prefix='y', xlabel='test')
+        assert params.xlabel == 'test'
+        assert params.ylabel == 'y2'
+        assert params.zlabel == 'y3'
+
     def test_jitter_x(self):
         params = _JitterParams(x=np.where(self.x > 0, '+', '-'), y=self.y)
         np.testing.assert_array_equal(params.x_labels, ['+', '-'])
@@ -593,7 +747,9 @@ class Test10X(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.X = data.load_10X(sparse=False)
-        self.X_pca, self.S = scprep.reduce.pca(self.X, n_components=10,
+        self.X_filt = scprep.filter.filter_empty_cells(self.X)
+        self.X_pca, self.S = scprep.reduce.pca(scprep.utils.toarray(self.X),
+                                               n_components=10,
                                                return_singular_values=True)
 
     @classmethod
@@ -602,20 +758,25 @@ def tearDownClass(self):
         try_remove("test.gif")
         try_remove("test.mp4")
         try_remove("test_jitter.png")
+        try_remove("test_histogram.png")
+        try_remove("test_library_size.png")
+        try_remove("test_variable_genes.png")
+        try_remove("test_gene_expression.png")
+        try_remove("test_scree.png")
 
     def tearDown(self):
         plt.close('all')
 
     def test_histogram(self):
-        scprep.plot.plot_library_size(self.X, cutoff=1000, log=True)
-        scprep.plot.plot_library_size(self.X, cutoff=1000, log=True,
+        scprep.plot.plot_library_size(self.X_filt, cutoff=1000, log=True)
+        scprep.plot.plot_library_size(self.X_filt, cutoff=1000, log=True,
                                       xlabel="x label", ylabel="y label")
 
     def test_histogram_list_of_lists(self):
-        scprep.plot.plot_library_size(scprep.utils.toarray(self.X).tolist())
+        scprep.plot.plot_library_size(scprep.utils.toarray(self.X_filt).tolist())
 
     def test_histogram_array(self):
-        scprep.plot.plot_library_size(scprep.utils.toarray(self.X))
+        scprep.plot.plot_library_size(scprep.utils.toarray(self.X_filt))
 
     def test_histogram_multiple(self):
         scprep.plot.histogram([scprep.select.select_rows(self.X, idx=0),
@@ -623,16 +784,18 @@ def test_histogram_multiple(self):
                               color=['r', 'b'])
 
     def test_histogram_multiple_cutoff(self):
-        scprep.plot.plot_library_size(self.X, cutoff=[500, 1000], log=True)
+        scprep.plot.plot_library_size(self.X_filt, cutoff=[500, 1000], log=True)
 
     def test_histogram_multiple_percentile(self):
-        scprep.plot.plot_library_size(self.X, percentile=[10, 90], log=True)
+        scprep.plot.plot_library_size(self.X_filt, percentile=[10, 90], log=True)
 
     def test_plot_library_size_multiple(self):
         scprep.plot.plot_library_size([
-            self.X, scprep.select.select_rows(
-                self.X, idx=np.arange(self.X.shape[0] // 2))],
-            color=['r', 'b'])
+            self.X_filt, scprep.select.select_rows(
+                self.X_filt, idx=np.arange(self.X_filt.shape[0] // 2))],
+            color=['r', 'b'],
+            filename="test_library_size.png")
+        assert os.path.exists("test_library_size.png")
 
     def test_plot_gene_set_expression_multiple(self):
         scprep.plot.plot_gene_set_expression([
@@ -652,7 +815,19 @@ def test_gene_set_expression_array(self):
     def test_plot_gene_set_expression_single_gene(self):
         scprep.plot.plot_gene_set_expression(
             self.X, color=["red"],
-            genes="Arl8b")
+            genes="Arl8b",
+            filename="test_gene_expression.png")
+        assert os.path.exists("test_gene_expression.png")
+
+    def test_plot_variable_genes(self):
+        scprep.plot.plot_gene_variability(
+            self.X,
+            filename="test_variable_genes.png")
+        assert os.path.exists("test_variable_genes.png")
+
+    def test_variable_genes_list_of_lists(self):
+        scprep.plot.plot_gene_variability(
+            scprep.utils.toarray(self.X).tolist())
 
     def test_histogram_single_gene_dataframe(self):
         scprep.plot.histogram(
@@ -668,7 +843,10 @@ def test_histogram_custom_axis(self):
         fig, ax = plt.subplots()
         scprep.plot.plot_gene_set_expression(
             self.X, genes=scprep.select.get_gene_set(self.X, starts_with="D"),
-            percentile=90, log='y', ax=ax, title="histogram")
+            percentile=90, log='y', ax=ax, title="histogram",
+            filename="test_histogram.png")
+        assert os.path.exists("test_histogram.png")
+        assert ax.get_title() == 'histogram'
 
     def test_histogram_invalid_axis(self):
         assert_raise_message(
@@ -678,13 +856,17 @@ def test_histogram_invalid_axis(self):
             self.X, ax="invalid")
 
     def test_scree(self):
-        scprep.plot.scree_plot(self.S)
-        scprep.plot.scree_plot(self.S, cumulative=True,
-                               xlabel="x label", ylabel="y label")
+        ax = scprep.plot.scree_plot(self.S)
+        assert all([t == int(t) for t in ax.get_xticks()]), ax.get_xticks()
+        ax = scprep.plot.scree_plot(self.S, cumulative=True,
+                               xlabel="x label", ylabel="y label", filename="test_scree.png")
+        assert all([t == int(t) for t in ax.get_xticks()]), ax.get_xticks()
+        assert os.path.isfile("test_scree.png")
 
     def test_scree_custom_axis(self):
         fig, ax = plt.subplots()
         scprep.plot.scree_plot(self.S, ax=ax)
+        assert all([t == int(t) for t in ax.get_xticks()]), ax.get_xticks()
 
     def test_scree_invalid_axis(self):
         assert_raise_message(
@@ -724,6 +906,18 @@ def test_jitter_continuous(self):
         assert ax.get_xlim() == (-0.5, 1.5)
         assert [t.get_text() for t in ax.get_xticklabels()] == ['+', '-']
 
+    def test_jitter_axis_labels(self):
+        ax = scprep.plot.jitter(np.where(self.X_pca[:, 0] > 0, '+', '-'),
+                                self.X_pca[:, 1],
+                                xlabel="test")
+        assert ax.get_xlabel() == "test"
+        assert ax.get_ylabel() == ''
+        ax = scprep.plot.jitter(
+            pd.Series(np.where(self.X_pca[:, 0] > 0, '+', '-'), name='x'),
+            pd.Series(self.X_pca[:, 1], name='y'), ylabel="override")
+        assert ax.get_xlabel() == "x"
+        assert ax.get_ylabel() == "override"
+
     def test_scatter_dict(self):
         scprep.plot.scatter2d(self.X_pca, c=np.random.choice(
             ['hello', 'world'], self.X_pca.shape[0], replace=True),
@@ -810,8 +1004,12 @@ def test_scatter_custom_ticklabels(self):
         assert np.all(xticklabels == np.array(['a', 'b', 'c']))
 
     def test_scatter_axis_labels(self):
+        ax = scprep.plot.scatter2d(
+            self.X_pca.tolist(), label_prefix="test")
+        assert ax.get_xlabel() == "test1"
+        assert ax.get_ylabel() == "test2"
         ax = scprep.plot.scatter3d(
-            self.X_pca, label_prefix="test")
+            self.X_pca.tolist(), label_prefix="test")
         assert ax.get_xlabel() == "test1"
         assert ax.get_ylabel() == "test2"
         assert ax.get_zlabel() == "test3"
@@ -819,6 +1017,20 @@ def test_scatter_axis_labels(self):
             self.X_pca, label_prefix="test", xlabel="override")
         assert ax.get_xlabel() == "override"
         assert ax.get_ylabel() == "test2"
+        ax = scprep.plot.scatter(
+            x=self.X_pca[:,0], y=pd.Series(self.X_pca[:,1], name='y'),
+            z=pd.Series(self.X_pca[:,2], name='z'),
+            ylabel='override')
+        assert ax.get_xlabel() == ''
+        assert ax.get_ylabel() == "override"
+        assert ax.get_zlabel() == "z"
+        ax = scprep.plot.scatter(
+            x=self.X_pca[:,0], y=pd.Series(self.X_pca[:,1], name='y'),
+            z=pd.Series(self.X_pca[:,2], name='z'),
+            zlabel='override')
+        assert ax.get_xlabel() == ''
+        assert ax.get_ylabel() == "y"
+        assert ax.get_zlabel() == "override"
 
     def test_scatter_axis_savefig(self):
         scprep.plot.scatter2d(
@@ -831,6 +1043,20 @@ def test_scatter_viewinit(self):
         assert ax.elev == 80
         assert ax.azim == 270
 
+    def test_scatter3d_data_2d(self):
+        assert_raise_message(
+            ValueError,
+            "Expected data.shape[1] >= 3. Got 2",
+            scprep.plot.scatter3d,
+            self.X_pca[:,:2])
+
+    def test_scatter3d_data_2d_list(self):
+        assert_raise_message(
+            ValueError,
+            "Expected data.shape[1] >= 3. Got 2",
+            scprep.plot.scatter3d,
+            self.X_pca[:,:2].tolist())
+
     def test_scatter_rotate_gif(self):
         scprep.plot.rotate_scatter3d(self.X_pca, fps=3, dpi=20,
                                      filename="test.gif")
@@ -874,6 +1100,13 @@ def test_scatter_invalid_s(self):
             scprep.plot.scatter2d, self.X_pca,
             s=self.X_pca[0, :])
 
+    def test_scatter_invalid_mask(self):
+        assert_raise_message(
+            ValueError, "Expected mask of length {}. Got {}".format(
+                self.X_pca.shape[0], self.X_pca.shape[1]),
+            scprep.plot.scatter2d, self.X_pca,
+            mask=self.X_pca[0, :] > 0)
+
     def test_scatter_invalid_discrete(self):
         assert_raise_message(
             ValueError, "Cannot treat non-numeric data as continuous",
@@ -1080,7 +1313,7 @@ def test_marker_plot_no_gene_names(self):
             "be provided. "
             "Got gene_names=None, data as a <class 'numpy.ndarray'>",
             scprep.plot.marker_plot,
-            data=self.X.values,
+            data=self.X.to_numpy(),
             clusters=np.random.choice(
                 np.arange(10), replace=True, size=self.X.shape[0]),
             markers={'tissue': ['z']})
diff --git a/test/test_reduce.py b/test/test_reduce.py
index c5620904..94179d16 100644
--- a/test/test_reduce.py
+++ b/test/test_reduce.py
@@ -1,6 +1,8 @@
 from tools import utils, matrix, data
 import scprep
 from scipy import sparse
+import numpy as np
+import pandas as pd
 from sklearn import decomposition
 from sklearn.utils.testing import assert_raise_message, assert_warns_message
 from functools import partial
@@ -25,6 +27,11 @@ def test_dense(self):
             self.X, utils.assert_transform_equals,
             Y=self.Y_random, transform=scprep.reduce.pca,
             n_components=100, seed=42)
+        matrix.test_all_matrix_types(
+            self.X, utils.assert_transform_equals,
+            Y=self.Y_random, transform=scprep.reduce.pca,
+            n_components=100, seed=42, method='dense',
+            check=partial(utils.assert_all_close, atol=1e-10))
 
     def test_sparse_svd(self):
         matrix.test_sparse_matrix_types(
@@ -33,11 +40,25 @@ def test_sparse_svd(self):
             check=partial(utils.assert_all_close, rtol=1e-3, atol=1e-5),
             n_components=50, eps=0.3, seed=42, method='svd')
 
+    def test_pandas(self):
+        X = pd.DataFrame(self.X, index=np.arange(self.X.shape[0]).astype(str),
+                         columns=np.arange(self.X.shape[1]).astype(float))
+        def test_fun(X_pd):
+            Y = scprep.reduce.pca(X_pd, n_components=100, seed=42)
+            assert isinstance(Y, pd.DataFrame)
+            assert np.all(Y.index == X.index)
+            assert np.all(Y.columns == np.array(['PC{}'.format(i+1)
+                                                 for i in range(Y.shape[1])]))
+        matrix.test_pandas_matrix_types(
+            X, test_fun)
+
     def test_sparse_orth_rproj(self):
+        def test_fn(*args, **kwargs):
+            return scprep.utils.toarray(scprep.reduce.pca(*args, **kwargs))
         matrix.test_sparse_matrix_types(
             self.X, utils.assert_transform_equals,
             check=utils.assert_matrix_class_equivalent,
-            Y=self.Y_full, transform=scprep.reduce.pca,
+            Y=self.Y_full, transform=test_fn,
             n_components=50, eps=0.3, seed=42, method='orth_rproj')
 
     def test_singular_values_dense(self):
@@ -53,10 +74,12 @@ def test_singular_values_sparse(self):
                 eps=0.3, seed=42, return_singular_values=True)[1], atol=1e-14)
 
     def test_sparse_rproj(self):
+        def test_fn(*args, **kwargs):
+            return scprep.utils.toarray(scprep.reduce.pca(*args, **kwargs))
         matrix.test_sparse_matrix_types(
             self.X, utils.assert_transform_equals,
             check=utils.assert_matrix_class_equivalent,
-            Y=self.Y_full, transform=scprep.reduce.pca,
+            Y=self.Y_full, transform=test_fn,
             n_components=50, eps=0.3, seed=42, method='rproj')
 
     def test_eps_too_low(self):
diff --git a/test/test_run.py b/test/test_run.py
index f0f2a66a..c191d94d 100644
--- a/test/test_run.py
+++ b/test/test_run.py
@@ -1,9 +1,13 @@
 from tools import utils, matrix, data
 import numpy as np
+import pandas as pd
 import scprep
 import scprep.run.r_function
 import unittest
+import sklearn.cluster
 import rpy2.rinterface_lib.callbacks
+import rpy2.rinterface_lib.embedded
+from sklearn.utils.testing import assert_raise_message, assert_warns_message
 
 builtin_warning = rpy2.rinterface_lib.callbacks.consolewrite_warnerror
 
@@ -15,7 +19,20 @@ def test_verbose():
     assert np.all(fun() == np.array([[1], [2], [3]]))
 
 
-class TestRFunctions(unittest.TestCase):
+def test_install_bioc():
+    assert_raise_message(
+        rpy2.rinterface_lib.embedded.RRuntimeError,
+        "Error: Bioconductor version '3.1' requires R version '3.2'; see",
+        scprep.run.install_bioconductor,
+        version='3.1', site_repository='https://bioconductor.org/packages/3.1/bioc',
+        verbose=False)
+
+
+class TestSplatter(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(self):
+        scprep.run.splatter.install(verbose=False)
 
     def test_splatter_default(self):
         sim = scprep.run.SplatSimulate(
@@ -176,3 +193,109 @@ def test_splatter_warning(self):
         scprep.run.r_function._ConsoleWarning.set_builtin()
         assert rpy2.rinterface_lib.callbacks.consolewrite_warnerror is \
             builtin_warning
+
+
+class TestSlingshot(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(self):
+        scprep.run.slingshot.install(verbose=False)
+        self.X = data.load_10X()
+        self.X_pca = scprep.reduce.pca(self.X)
+        self.clusters = sklearn.cluster.KMeans(6).fit_predict(self.X_pca)
+
+    def test_slingshot(self):
+        slingshot = scprep.run.Slingshot(self.X_pca[:,:2], self.clusters, verbose=False)
+        pseudotime, branch, curves = slingshot['pseudotime'], slingshot['branch'], slingshot['curves']
+        assert pseudotime.shape[0] == self.X_pca.shape[0]
+        assert pseudotime.shape[1] == curves.shape[0]
+        assert branch.shape[0] == self.X_pca.shape[0]
+        current_pseudotime = -1
+        for i in np.unique(branch):
+            branch_membership = np.isnan(pseudotime[branch==i])
+            assert np.all(branch_membership == branch_membership[0])
+            new_pseudotime = np.nanmean(pseudotime[branch==i])
+            assert new_pseudotime > current_pseudotime
+            current_pseudotime = new_pseudotime
+        assert curves.shape[1] == self.X_pca.shape[0]
+        assert curves.shape[2] == 2
+        assert np.all(np.any(~np.isnan(pseudotime), axis=1))
+
+    def test_slingshot_pandas(self):
+        slingshot = scprep.run.Slingshot(pd.DataFrame(self.X_pca[:,:2], index=self.X.index),
+                                         self.clusters, verbose=False)
+        pseudotime, branch, curves = slingshot['pseudotime'], slingshot['branch'], slingshot['curves']
+        assert np.all(pseudotime.index == self.X.index)
+        assert np.all(branch.index == self.X.index)
+        assert branch.name == 'branch'
+        assert pseudotime.shape[0] == self.X_pca.shape[0]
+        assert pseudotime.shape[1] == curves.shape[0]
+        assert branch.shape[0] == self.X_pca.shape[0]
+        current_pseudotime = -1
+        for i in np.unique(branch):
+            branch_membership = np.isnan(pseudotime.loc[branch==i])
+            assert np.all(branch_membership == branch_membership.iloc[0])
+            new_pseudotime = np.nanmean(np.nanmean(pseudotime.loc[branch==i]))
+            assert new_pseudotime > current_pseudotime
+            current_pseudotime = new_pseudotime
+        assert curves.shape[1] == self.X_pca.shape[0]
+        assert curves.shape[2] == 2
+        assert np.all(np.any(~np.isnan(pseudotime), axis=1))
+
+    def test_slingshot_distance(self):
+        assert_raise_message(
+            NotImplementedError,
+            "distance argument not currently implemented",
+            scprep.run.Slingshot,
+            self.X_pca, self.clusters, distance=lambda X, Y : np.sum(X-Y))
+
+    def test_slingshot_optional_args(self):
+        slingshot = scprep.run.Slingshot(self.X_pca[:,:2], self.clusters,
+                                         start_cluster=4, omega=0.1, verbose=False)
+        pseudotime, branch, curves = slingshot['pseudotime'], slingshot['branch'], slingshot['curves']
+        assert pseudotime.shape[0] == self.X_pca.shape[0]
+        assert pseudotime.shape[1] == curves.shape[0]
+        assert branch.shape[0] == self.X_pca.shape[0]
+        current_pseudotime = -1
+        for i in np.unique(branch):
+            branch_membership = np.isnan(pseudotime[branch==i])
+            assert np.all(branch_membership == branch_membership[0])
+            if np.all(np.isnan(pseudotime[branch==i])):
+                assert i == -1
+            else:
+                new_pseudotime = np.nanmean(pseudotime[branch==i])
+                assert new_pseudotime > current_pseudotime
+                current_pseudotime = new_pseudotime
+        assert curves.shape[1] == self.X_pca.shape[0]
+        assert curves.shape[2] == 2
+        slingshot = scprep.run.Slingshot(self.X_pca[:,:2], self.clusters,
+                                         end_cluster=0, verbose=False)
+        pseudotime, branch, curves = slingshot['pseudotime'], slingshot['branch'], slingshot['curves']
+        assert pseudotime.shape[0] == self.X_pca.shape[0]
+        assert pseudotime.shape[1] == curves.shape[0]
+        assert branch.shape[0] == self.X_pca.shape[0]
+        current_pseudotime = -1
+        for i in np.unique(branch):
+            branch_membership = np.isnan(pseudotime[branch==i])
+            assert np.all(branch_membership == branch_membership[0])
+            new_pseudotime = np.nanmean(pseudotime[branch==i])
+            assert new_pseudotime > current_pseudotime
+            current_pseudotime = new_pseudotime
+        assert curves.shape[1] == self.X_pca.shape[0]
+        assert curves.shape[2] == 2
+        assert np.all(np.any(~np.isnan(pseudotime), axis=1))
+
+    def test_slingshot_errors(self):
+        assert_warns_message(
+            UserWarning,
+            "Expected data to be low-dimensional. "
+            "Got data.shape[1] = 4",
+            scprep.run.Slingshot,
+            self.X_pca[:, :4], self.clusters, verbose=False)
+        assert_raise_message(
+            ValueError,
+            "Expected len(cluster_labels) ({}) to equal "
+            "data.shape[0] ({})".format(
+                self.X.shape[0]//2, self.X.shape[0]),
+            scprep.run.Slingshot,
+            self.X_pca[:, :2], self.clusters[:self.X.shape[0]//2], verbose=False)
diff --git a/test/test_sanitize.py b/test/test_sanitize.py
index 9ed4ef30..9c46f611 100644
--- a/test/test_sanitize.py
+++ b/test/test_sanitize.py
@@ -20,15 +20,18 @@ def test_check_numeric_inplace():
         utils.assert_transform_unchanged,
         matrix._scipy_matrix_types +
         matrix._numpy_matrix_types +
-        matrix._pandas_dense_matrix_types,
+        matrix._pandas_dense_matrix_types +
+        [matrix.SparseDataFrame],
         transform=scprep.sanitize.check_numeric,
         copy=False)
+    matrix._ignore_pandas_sparse_warning()
     assert_raise_message(
         TypeError,
         "pd.SparseDataFrame does not support "
         "copy=False. Please use copy=True.",
         scprep.sanitize.check_numeric,
-        data=X, copy=False)
+        data=matrix.SparseDataFrame_deprecated(X), copy=False)
+    matrix._reset_warnings()
 
     class TypeErrorClass(object):
 
diff --git a/test/test_select.py b/test/test_select.py
index e05738e3..ff05f652 100644
--- a/test/test_select.py
+++ b/test/test_select.py
@@ -41,7 +41,7 @@ def test_get_gene_set_ndarray(self):
             "data must be a list of gene names or a pandas "
             "DataFrame. Got ndarray",
             scprep.select.get_gene_set,
-            data=self.X.values, regex="8$")
+            data=self.X.to_numpy(), regex="8$")
 
     def test_get_gene_set_no_condition(self):
         assert_warns_message(
@@ -75,7 +75,7 @@ def test_get_cell_set_ndarray(self):
             "data must be a list of cell names or a pandas "
             "DataFrame. Got ndarray",
             scprep.select.get_cell_set,
-            data=self.X.values, regex="G\\-1$")
+            data=self.X.to_numpy(), regex="G\\-1$")
 
     def test_get_cell_set_no_condition(self):
         assert_warns_message(
@@ -106,7 +106,7 @@ def test_select_rows_integer_index(self):
     def test_select_rows_string_array_index(self):
         matrix.test_pandas_matrix_types(
             self.X, scprep.select.select_rows,
-            idx=np.random.choice(self.X.index.values, self.X.shape[0] // 2))
+            idx=np.random.choice(self.X.index.to_numpy(), self.X.shape[0] // 2))
 
     def test_select_rows_pandas_index_index(self):
         matrix.test_pandas_matrix_types(
@@ -142,11 +142,11 @@ def test_select_rows_sparse_series_data_integer_index(self):
 
     def test_select_rows_1d_array_data(self):
         scprep.select.select_rows(
-            self.X, self.X.values[:, 0], idx=np.random.choice([True, False], [self.X.shape[0]]))
+            self.X, self.X.to_numpy()[:, 0], idx=np.random.choice([True, False], [self.X.shape[0]]))
 
     def test_select_rows_list_data(self):
         scprep.select.select_rows(
-            self.X, self.X.values[:, 0].tolist(), idx=np.random.choice([True, False], [self.X.shape[1]]))
+            self.X, self.X.to_numpy()[:, 0].tolist(), idx=np.random.choice([True, False], [self.X.shape[1]]))
 
     def test_select_rows_get_cell_set(self):
         matrix.test_pandas_matrix_types(
@@ -189,7 +189,7 @@ def test_select_cols_integer_index(self):
     def test_select_cols_string_array_index(self):
         matrix.test_pandas_matrix_types(
             self.X, scprep.select.select_cols,
-            idx=np.random.choice(self.X.columns.values, self.X.shape[1] // 2))
+            idx=np.random.choice(self.X.columns.to_numpy(), self.X.shape[1] // 2))
 
     def test_select_cols_pandas_index_index(self):
         matrix.test_pandas_matrix_types(
@@ -241,11 +241,11 @@ def test_select_cols_sparse_series_data_integer_index(self):
 
     def test_select_cols_1d_array_data(self):
         scprep.select.select_cols(
-            self.X, self.X.values[0, :], idx=np.random.choice([True, False], [self.X.shape[1]]))
+            self.X, self.X.to_numpy()[0, :], idx=np.random.choice([True, False], [self.X.shape[1]]))
 
     def test_select_cols_list_data(self):
         scprep.select.select_cols(
-            self.X, self.X.values[0, :].tolist(), idx=np.random.choice([True, False], [self.X.shape[1]]))
+            self.X, self.X.to_numpy()[0, :].tolist(), idx=np.random.choice([True, False], [self.X.shape[1]]))
 
     def test_select_cols_get_gene_set(self):
         matrix.test_pandas_matrix_types(
@@ -267,14 +267,14 @@ def test_select_cols_no_condition(self):
 
     def test_select_rows_invalid_index(self):
         assert_raise_message(KeyError,
-                             "the label [not_a_cell] is not in the [index]",
+                             "'not_a_cell'",
                              scprep.select.select_rows,
                              self.X,
                              idx='not_a_cell')
 
     def test_select_cols_invalid_index(self):
         assert_raise_message(KeyError,
-                             "the label [not_a_gene] is not in the [columns]",
+                             "'not_a_gene'",
                              scprep.select.select_cols,
                              self.X,
                              idx='not_a_gene')
@@ -318,7 +318,7 @@ def test_select_cols_unequal_columns(self):
             "columns. Got [100, 50]",
             scprep.select.select_cols,
             self.X,
-            self.X.values[:, :50])
+            self.X.to_numpy()[:, :50])
 
     def test_select_rows_unequal_rows(self):
         assert_raise_message(
@@ -327,34 +327,34 @@ def test_select_rows_unequal_rows(self):
             "rows. Got [100, 50]",
             scprep.select.select_rows,
             self.X,
-            self.X.values[:50, :])
+            self.X.to_numpy()[:50, :])
 
     def test_select_cols_conflicting_data(self):
         assert_raise_message(
             ValueError,
             "Expected `data` and `extra_data` pandas inputs to have the same "
             "column names. Fix with "
-            "`scprep.select.select_cols(*extra_data, data.columns)`",
+            "`scprep.select.select_cols(*extra_data, idx=data.columns)`",
             scprep.select.select_cols,
             self.X,
-            scprep.select.subsample(self.X.T, n=self.X.shape[0]).T)
+            self.X.iloc[:,::-1])
 
     def test_select_rows_conflicting_data(self):
         assert_raise_message(
             ValueError,
             "Expected `data` and `extra_data` pandas inputs to have the same "
             "index. Fix with "
-            "`scprep.select.select_rows(*extra_data, data.index)`",
+            "`scprep.select.select_rows(*extra_data, idx=data.index)`",
             scprep.select.select_rows,
             self.X,
-            scprep.select.subsample(self.X, n=self.X.shape[0]))
+            self.X.iloc[::-1])
 
     def test_select_cols_get_gene_set_ndarray_data(self):
         assert_raise_message(
             ValueError,
             "Can only select based on column names with DataFrame input. "
             "Please set `idx` to select specific columns.",
-            scprep.select.select_cols, self.X.values, starts_with="A"
+            scprep.select.select_cols, self.X.to_numpy(), starts_with="A"
         )
 
     def test_select_rows_get_cell_set_ndarray_data(self):
@@ -362,7 +362,7 @@ def test_select_rows_get_cell_set_ndarray_data(self):
             ValueError,
             "Can only select based on row names with DataFrame input. "
             "Please set `idx` to select specific rows.",
-            scprep.select.select_rows, self.X.values, starts_with="A"
+            scprep.select.select_rows, self.X.to_numpy(), starts_with="A"
         )
 
     def test_subsample(self):
@@ -407,6 +407,27 @@ def test_subsample_n_too_large(self):
             "Expected n (101) <= n_samples (100)",
             scprep.select.subsample, self.X, n=self.X.shape[0] + 1)
 
+    def test_sparse_dataframe_fill_value(self):
+        def test_fun(X):
+            Y = scprep.select.select_rows(X, idx=np.arange(X.shape[0]//2))
+            for col in Y.columns:
+                assert X[col].dtype == Y[col].dtype, (X[col].dtype, Y[col].dtype)
+            Y = scprep.select.select_cols(X, idx=np.arange(X.shape[1]//2))
+            for col in Y.columns:
+                assert X[col].dtype == Y[col].dtype, (X[col].dtype, Y[col].dtype)
+        matrix.test_matrix_types(
+            self.X.astype(float), test_fun, matrix._pandas_sparse_matrix_types)
+
+    def test_select_variable_genes(self):
+        X_filtered = scprep.select.highly_variable_genes(self.X, percentile=70)
+        assert X_filtered.shape[0] == self.X.shape[0]
+        assert X_filtered.shape[1] <= 30
+        assert X_filtered.shape[1] >= 20
+        assert self.X.columns[np.argmax(self.X.values.std(axis=0))] in X_filtered.columns
+        matrix.test_all_matrix_types(
+            self.X, utils.assert_transform_equals,
+            Y=X_filtered, transform=scprep.select.highly_variable_genes, percentile=70)
+
 
 def test_string_subset_exact_word():
     np.testing.assert_array_equal(scprep.select._get_string_subset_mask(
@@ -427,6 +448,8 @@ def test_string_subset_exact_word():
         ['World, hello!', 'world'], exact_word='hello'), [True, False])
     np.testing.assert_array_equal(scprep.select._get_string_subset_mask(
         ['helloooo!', 'world'], exact_word='hello'), [False, False])
+    np.testing.assert_array_equal(scprep.select._get_string_subset_mask(
+        ['(hello) world', 'world'], exact_word='(hello) world'), [True, False])
 
 
 def test_string_subset_list():
diff --git a/test/test_stats.py b/test/test_stats.py
index fdb7eff1..7294b74b 100644
--- a/test/test_stats.py
+++ b/test/test_stats.py
@@ -6,6 +6,8 @@
 import scprep
 from functools import partial
 import warnings
+import os
+from parameterized import parameterized
 
 
 def _test_fun_2d(X, fun, **kwargs):
@@ -97,6 +99,8 @@ def test_knnDREMI():
     Y2, drevi = scprep.stats.knnDREMI(X[:, 0], X[:, 1],
                                       plot=True, filename="test.png",
                                       return_drevi=True)
+    assert os.path.isfile("test.png")
+    os.remove("test.png")
     assert Y2 == Y
     assert drevi.shape == (20, 20)
     matrix.test_all_matrix_types(
@@ -121,3 +125,142 @@ def test_knnDREMI():
         "Attempting to calculate kNN-DREMI on a constant array. "
         "Returning `0`", scprep.stats.knnDREMI, X[:, 0],
         np.zeros_like(X[:, 1]))
+
+
+def test_mean_difference():
+    X = data.load_10X()
+    X = scprep.filter.filter_empty_genes(X)
+    Y = scprep.stats.mean_difference(X.iloc[:20], X.iloc[20:100])
+    assert np.allclose(np.max(Y), 16.8125)
+    assert np.allclose(np.min(Y), -0.5625)
+    def test_fun(X, **kwargs):
+        return scprep.stats.mean_difference(
+            scprep.select.select_rows(X, idx=np.arange(20)),
+            scprep.select.select_rows(X, idx=np.arange(20, 100)),
+        **kwargs)
+    matrix.test_all_matrix_types(
+        X, utils.assert_transform_equals, Y=Y,
+        transform=test_fun,
+        check=utils.assert_all_close)
+    assert_raise_message(
+        ValueError,
+        "Expected X and Y to have the same number of columns. "
+        "Got shapes {}, {}".format(X.shape, X.iloc[:,:10].shape),
+        scprep.stats.mean_difference,
+        X, X.iloc[:,:10])
+
+
+@parameterized([('difference', 'up'), ('difference', 'down'), ('difference', 'both'),
+               ('emd', 'up'), ('emd', 'down'), ('emd', 'both')])
+def test_differential_expression(measure, direction):
+    X = data.load_10X()
+    X = scprep.filter.filter_empty_genes(X)
+    result = scprep.stats.differential_expression(X.iloc[:20], X.iloc[20:100],
+                                                 measure=measure, direction=direction)
+    expected_results = {('difference', 'up') : ('Gstm5', 16.8125),
+                        ('difference', 'down') : ('Slc2a3', -0.5625),
+                        ('difference', 'both') : ('Gstm5', 16.8125),
+                        ('emd', 'up') : ('Gstm5', 17.5625),
+                        ('emd', 'down') : ('Slc2a3', -0.6875),
+                        ('emd', 'both') : ('Gstm5', 17.5625)}
+    assert result['gene'][0] == expected_results[(measure, direction)][0], result['gene'][0]
+    assert np.allclose(result[measure][0],
+                       expected_results[(measure, direction)][1])
+    result_unnamed = scprep.stats.differential_expression(X.iloc[:20].sparse.to_coo(), X.iloc[20:100].sparse.to_coo(),
+                                                         measure=measure, direction=direction)
+    if direction != 'both':
+        values = result[measure]
+    else:
+        values = np.abs(result[measure])
+
+    unique_values = ~np.isin(values, values[values.duplicated()])
+    assert np.all(X.columns[result_unnamed['gene']][unique_values] == result['gene'][unique_values])
+    def test_fun(X, **kwargs):
+        return scprep.stats.differential_expression(
+            scprep.select.select_rows(X, idx=np.arange(20)),
+            scprep.select.select_rows(X, idx=np.arange(20, 100)),
+        **kwargs)
+
+    def check_fun(Y1, Y2):
+        if direction == 'both':
+            Y1[measure] = np.abs(Y1[measure])
+            Y2[measure] = np.abs(Y2[measure])
+        np.testing.assert_allclose(Y1[measure], Y2[measure], atol=5e-4)
+        Y1 = Y1.sort_values('gene')
+        Y2 = Y2.sort_values('gene')
+        np.testing.assert_allclose(Y1[measure], Y2[measure], atol=5e-4)
+
+    matrix.test_all_matrix_types(
+        X, utils.assert_transform_equals, Y=result,
+        transform=test_fun,
+        check=check_fun,
+        gene_names=X.columns,
+        measure=measure, direction=direction)
+
+
+def test_differential_expression_error():
+    X = data.load_10X()
+    assert_raise_message(
+        ValueError, "Expected `direction` in ['up', 'down', 'both']. "
+        "Got invalid", scprep.stats.differential_expression,
+        X, X, direction='invalid')
+    assert_raise_message(
+        ValueError, "Expected `measure` in ['difference', 'emd']. "
+        "Got invalid", scprep.stats.differential_expression,
+        X, X, measure='invalid')
+    assert_raise_message(
+        ValueError, "Expected `X` and `Y` to be matrices. "
+        "Got shapes {}, {}".format(X.shape, X.iloc[0].shape),
+        scprep.stats.differential_expression,
+        X, X.iloc[0])
+    assert_raise_message(
+        ValueError, "Expected gene_names to have length {}. "
+        "Got {}".format(X.shape[0], X.shape[0]//2),
+        scprep.stats.differential_expression,
+        X.sparse.to_coo(), X.sparse.to_coo(), gene_names=np.arange(X.shape[0]//2))
+    assert_raise_message(
+        ValueError, "Expected gene_names to have length {}. "
+        "Got {}".format(X.shape[0], X.shape[0]//2),
+        scprep.stats.differential_expression_by_cluster,
+        X.sparse.to_coo(), np.random.choice(2, X.shape[0], replace=True),
+        gene_names=np.arange(X.shape[0]//2))
+    assert_warns_message(
+        UserWarning, "Input data has inconsistent column names. "
+        "Subsetting to 20 common columns.",
+        scprep.stats.differential_expression,
+        X, X.iloc[:,:20])
+
+
+def test_differential_expression_by_cluster():
+    measure = 'difference'
+    direction = 'up'
+    X = data.load_10X()
+    np.random.seed(42)
+    clusters = np.random.choice(4, X.shape[0], replace=True)
+    result = scprep.stats.differential_expression_by_cluster(
+        X, clusters,
+        measure=measure, direction=direction)
+    for cluster in range(4):
+        r = scprep.stats.differential_expression(
+            scprep.select.select_rows(X, idx=clusters==cluster),
+            scprep.select.select_rows(X, idx=clusters!=cluster),
+        measure=measure, direction=direction)
+        assert np.all(result[cluster] == r)
+
+
+def test_differential_expression_by_cluster_subset():
+    measure = 'difference'
+    direction = 'up'
+    X = data.load_10X()
+    np.random.seed(42)
+    clusters = np.random.choice(4, X.shape[0], replace=True)
+    result = scprep.stats.differential_expression_by_cluster(
+        X, clusters,
+        measure=measure, direction=direction, gene_names=X.columns[:X.shape[0]//2])
+    for cluster in range(4):
+        r = scprep.stats.differential_expression(
+            scprep.select.select_rows(X, idx=clusters==cluster),
+            scprep.select.select_rows(X, idx=clusters!=cluster),
+            measure=measure, direction=direction,
+            gene_names=X.columns[:X.shape[0]//2])
+        assert np.all(result[cluster] == r)
diff --git a/test/test_transform.py b/test/test_transform.py
index f0d26f31..58168cbd 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -33,17 +33,13 @@ def test_log_transform():
         Y=Y, transform=scprep.transform.log,
         base=2)
     Y = np.log2(X + 5)
-    assert_warns_message(
-        RuntimeWarning,
-        "log transform on sparse data requires pseudocount = 1",
-        scprep.transform.log,
-        data=sparse.csr_matrix(X), base=2, pseudocount=5)
-    assert_warns_message(
-        RuntimeWarning,
-        "log transform on sparse data requires pseudocount = 1",
-        scprep.transform.log,
-        data=pd.SparseDataFrame(X, default_fill_value=0.0),
-        base=2, pseudocount=5)
+    def test_fun(X):
+        assert_warns_message(
+            RuntimeWarning,
+            "log transform on sparse data requires pseudocount = 1",
+            scprep.transform.log,
+            data=X, base=2, pseudocount=5)
+    matrix.test_sparse_matrix_types(X, test_fun)
     matrix.test_dense_matrix_types(
         X, utils.assert_transform_equivalent,
         Y=Y, transform=scprep.transform.log,
diff --git a/test/test_utils.py b/test/test_utils.py
index d3ab0da3..e5c1d55c 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -66,7 +66,7 @@ def test_with_pkg_version_fail_major():
     def test():
         return True
     assert_raise_message(ImportError,
-                         "scprep requires numpy>={0} (installed: {1}). "
+                         "numpy>={0} is required (installed: {1}). "
                          "Please upgrade it with e.g."
                          " `pip install --user --upgrade numpy".format(
                              major + 1, np.__version__),
@@ -80,7 +80,7 @@ def test_with_pkg_version_fail_minor():
     def test():
         return True
     assert_raise_message(ImportError,
-                         "scprep requires numpy>={0}.{1} (installed: {2}). "
+                         "numpy>={0}.{1} is required (installed: {2}). "
                          "Please upgrade it with e.g."
                          " `pip install --user --upgrade numpy".format(
                              major, minor + 1, np.__version__),
@@ -122,6 +122,8 @@ def test_combine_batches():
     assert np.all(Y.index == Y2.index)
     assert np.all(sample_labels == np.concatenate(
         [np.repeat(0, X.shape[0]), np.repeat(1, X.shape[0] // 2)]))
+    assert np.all(sample_labels.index == Y2.index)
+    assert sample_labels.name == 'sample_labels'
     Y2, sample_labels = scprep.utils.combine_batches(
         [X, scprep.select.select_rows(
             X, idx=np.arange(X.shape[0] // 2))],
@@ -131,6 +133,8 @@ def test_combine_batches():
     assert np.all(np.core.defchararray.add(
         "_", sample_labels.astype(str)) == np.array(
         [i[-2:] for i in Y2.index], dtype=str))
+    assert np.all(sample_labels.index == Y2.index)
+    assert sample_labels.name == 'sample_labels'
     transform = lambda X: scprep.utils.combine_batches(
         [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))],
         batch_labels=[0, 1])[0]
@@ -141,6 +145,25 @@ def test_combine_batches():
         Y=Y,
         transform=transform,
         check=utils.assert_all_equal)
+    def test_fun(X):
+        Y, sample_labels = scprep.utils.combine_batches(
+            [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))],
+            batch_labels=[0, 1])
+        assert np.all(sample_labels.index == Y.index)
+        assert sample_labels.name == 'sample_labels'
+    matrix.test_pandas_matrix_types(
+        X,
+        test_fun)
+
+
+def test_combine_batches_rangeindex():
+    X = data.load_10X()
+    X = X.reset_index(drop=True)
+    Y = X.iloc[:X.shape[0] // 2]
+    data_combined, labels = scprep.utils.combine_batches(
+        [X, Y], ['x', 'y'])
+    assert isinstance(data_combined.index, pd.RangeIndex)
+    assert np.all(data_combined.columns == X.columns)
 
 
 def test_combine_batches_uncommon_genes():
@@ -161,14 +184,14 @@ def test_combine_batches_errors():
         "append_to_cell_names only valid for pd.DataFrame input. "
         "Got coo_matrix",
         scprep.utils.combine_batches,
-        [X.to_coo(), X.iloc[:X.shape[0] // 2].to_coo()],
+        [X.sparse.to_coo(), X.iloc[:X.shape[0] // 2].sparse.to_coo()],
         batch_labels=[0, 1],
         append_to_cell_names=True)
     assert_raise_message(
         TypeError,
-        "Expected data all of the same class. Got SparseDataFrame, coo_matrix",
+        "Expected data all of the same class. Got DataFrame, coo_matrix",
         scprep.utils.combine_batches,
-        [X, X.iloc[:X.shape[0] // 2].to_coo()],
+        [X, X.iloc[:X.shape[0] // 2].sparse.to_coo()],
         batch_labels=[0, 1])
     assert_raise_message(
         ValueError,
@@ -217,12 +240,25 @@ def test_fun(X):
     matrix.test_all_matrix_types(X,
                                  test_fun)
     test_fun([X, np.matrix(X)])
+
+
+def test_toarray_string_error():
     assert_raise_message(TypeError,
                          "Expected array-like. Got ",
                          scprep.utils.toarray,
                          "hello")
 
 
+def test_toarray_vector():
+    X = data.generate_positive_sparse_matrix(shape=(50,))
+
+    def test_fun(X):
+        assert isinstance(scprep.utils.toarray(X), np.ndarray)
+    matrix.test_matrix_types(X,
+                             test_fun,
+                             matrix._pandas_vector_types)
+
+
 def test_toarray_list_of_strings():
     X = ['hello', 'world', [1, 2, 3]]
     X = scprep.utils.toarray(X)
@@ -271,6 +307,50 @@ def test_matrix_sum():
                          5)
 
 
+def test_matrix_std():
+    X = data.generate_positive_sparse_matrix(shape=(50, 100))
+    stds = np.array(X.std(0)).flatten()
+    matrix.test_all_matrix_types(X, utils.assert_transform_equals, Y=stds,
+                                 transform=scprep.utils.matrix_std, axis=0,
+                                 check=utils.assert_all_close)
+    matrix.test_numpy_matrix(X, utils.assert_transform_equals, Y=stds,
+                             transform=scprep.utils.matrix_std, axis=0,
+                             check=utils.assert_all_close)
+
+    stds = np.array(X.std(1)).flatten()
+    matrix.test_all_matrix_types(X, utils.assert_transform_equals, Y=stds,
+                                 transform=scprep.utils.matrix_std, axis=1,
+                                 check=utils.assert_all_close)
+    matrix.test_numpy_matrix(X, utils.assert_transform_equals, Y=stds,
+                             transform=scprep.utils.matrix_std, axis=1,
+                             check=utils.assert_all_close)
+
+    stds = np.array(X.std(None)).flatten()
+    matrix.test_all_matrix_types(X, utils.assert_transform_equals, Y=stds,
+                                 transform=scprep.utils.matrix_std, axis=None,
+                                 check=utils.assert_all_close)
+    matrix.test_numpy_matrix(X, utils.assert_transform_equals, Y=stds,
+                             transform=scprep.utils.matrix_std, axis=None,
+                             check=utils.assert_all_close)
+
+    X_df = pd.DataFrame(X, index=np.arange(X.shape[0]).astype(str),
+                        columns=np.arange(X.shape[1]).astype(str))
+    def test_fun(X):
+        x = scprep.utils.matrix_std(X, axis=0)
+        assert x.name == 'std'
+        assert np.all(x.index == X_df.columns)
+        x = scprep.utils.matrix_std(X, axis=1)
+        assert x.name == 'std'
+        assert np.all(x.index == X_df.index)
+    matrix.test_pandas_matrix_types(
+        X_df, test_fun)
+    assert_raise_message(ValueError,
+                         "Expected axis in [0, 1, None]. Got 5",
+                         scprep.utils.matrix_std,
+                         data,
+                         5)
+
+
 def test_matrix_elementwise_multiply_row():
     X = data.generate_positive_sparse_matrix(shape=(50, 100))
     x = X[:, 0] + 1
@@ -367,33 +447,89 @@ def test_matrix_elementwise_multiply_invalid_axis():
 
 def test_deprecated():
     X = data.load_10X()
-    assert_warns_message(FutureWarning,
+    assert_raise_message(RuntimeError,
                          "`scprep.utils.select_cols` is deprecated. Use "
                          "`scprep.select.select_cols` instead.",
                          scprep.utils.select_cols,
                          X,
                          [1, 2, 3])
-    assert_warns_message(FutureWarning,
+    assert_raise_message(RuntimeError,
                          "`scprep.utils.select_rows` is deprecated. Use "
                          "`scprep.select.select_rows` instead.",
                          scprep.utils.select_rows,
                          X,
                          [1, 2, 3])
-    assert_warns_message(FutureWarning,
+    assert_raise_message(RuntimeError,
                          "`scprep.utils.get_gene_set` is deprecated. Use "
                          "`scprep.select.get_gene_set` instead.",
                          scprep.utils.get_gene_set,
                          X,
                          starts_with="D")
-    assert_warns_message(FutureWarning,
+    assert_raise_message(RuntimeError,
                          "`scprep.utils.get_cell_set` is deprecated. Use "
                          "`scprep.select.get_cell_set` instead.",
                          scprep.utils.get_cell_set,
                          X,
                          starts_with="A")
-    assert_warns_message(FutureWarning,
+    assert_raise_message(RuntimeError,
                          "`scprep.utils.subsample` is deprecated. Use "
                          "`scprep.select.subsample` instead.",
                          scprep.utils.subsample,
                          X,
                          n=10)
+
+
+def test_is_sparse_dataframe():
+    X = data.load_10X(sparse=False)
+    Y = X.astype(pd.SparseDtype(float, fill_value=0.0))
+    assert scprep.utils.is_sparse_dataframe(Y)
+    def test_fun(X):
+        assert not scprep.utils.is_sparse_dataframe(X)
+    matrix.test_matrix_types(
+        X,
+        test_fun,
+        matrix._scipy_matrix_types +
+        matrix._numpy_matrix_types +
+        matrix._pandas_dense_matrix_types +
+        [matrix.SparseDataFrame_deprecated]
+    )
+
+
+def test_SparseDataFrame():
+    X = data.load_10X(sparse=False)
+    Y = X.astype(pd.SparseDtype(float, fill_value=0.0))
+    index = X.index
+    columns = X.columns
+    def test_fun(X):
+        X = scprep.utils.SparseDataFrame(X, index=index, columns=columns)
+        utils.assert_matrix_class_equivalent(X, Y)
+    matrix.test_all_matrix_types(
+        X,
+        test_fun
+    )
+    matrix.test_pandas_matrix_types(
+        X,
+        utils.assert_transform_equivalent,
+        Y=Y,
+        transform=scprep.utils.SparseDataFrame
+    )
+
+
+def test_is_sparse_series():
+    X = data.load_10X(sparse=True)
+    assert scprep.utils.is_sparse_series(X[X.columns[0]])
+    def test_fun(X):
+        if isinstance(X, pd.SparseDataFrame):
+            x = X[X.columns[0]]
+        else:
+            x = scprep.select.select_cols(X, idx=0)
+        assert not scprep.utils.is_sparse_series(x)
+    matrix.test_matrix_types(
+        X.to_numpy(),
+        test_fun,
+        matrix._scipy_matrix_types +
+        matrix._numpy_matrix_types +
+        matrix._pandas_dense_matrix_types +
+        [matrix.SparseDataFrame_deprecated]
+    )
+    
\ No newline at end of file
diff --git a/test/tools/__init__.py b/test/tools/__init__.py
index 9b2e90bd..610d72cb 100644
--- a/test/tools/__init__.py
+++ b/test/tools/__init__.py
@@ -1,2 +1,2 @@
 import matplotlib as mpl
-mpl.use("Agg")
+mpl.use("agg")
diff --git a/test/tools/matrix.py b/test/tools/matrix.py
index ec741ddd..e2251f58 100644
--- a/test/tools/matrix.py
+++ b/test/tools/matrix.py
@@ -5,6 +5,37 @@
 from functools import partial
 
 
+def _ignore_pandas_sparse_warning():
+    warnings.filterwarnings(
+        "ignore",
+        category=FutureWarning,
+        message="SparseSeries")
+    warnings.filterwarnings(
+        "ignore",
+        category=FutureWarning,
+        message="SparseDataFrame")
+    warnings.filterwarnings(
+        "error",
+        category=pd.errors.PerformanceWarning)
+
+
+def _reset_warnings():
+    warnings.filterwarnings(
+        "error",
+        category=FutureWarning,
+        message="SparseSeries")
+    warnings.filterwarnings(
+        "error",
+        category=FutureWarning,
+        message="SparseDataFrame")
+    warnings.filterwarnings(
+        "error",
+        category=pd.errors.PerformanceWarning)
+
+
+_reset_warnings()
+
+
 def _no_warning_dia_matrix(*args, **kwargs):
     """Helper function to silently create diagonal matrix"""
     with warnings.catch_warnings():
@@ -15,7 +46,24 @@ def _no_warning_dia_matrix(*args, **kwargs):
             " diagonals is inefficient")
         return sparse.dia_matrix(*args, **kwargs)
 
-SparseDataFrame = partial(pd.SparseDataFrame, default_fill_value=0.0)
+def SparseDataFrame_deprecated(X, default_fill_value=0.0):
+    return pd.SparseDataFrame(X, default_fill_value=default_fill_value)
+
+def SparseSeries(X, default_fill_value=0.0):
+    return pd.Series(X).astype(pd.SparseDtype(float, fill_value=default_fill_value))
+
+def SparseSeries_deprecated(X, default_fill_value=0.0):
+    return pd.SparseSeries(X, fill_value=default_fill_value)
+
+
+def SparseDataFrame(X, default_fill_value=0.0):
+    if sparse.issparse(X):
+        X = pd.DataFrame.sparse.from_spmatrix(X)
+        X.sparse.fill_value = default_fill_value
+    elif isinstance(X, pd.SparseDataFrame) or not isinstance(X, pd.DataFrame):
+        X = pd.DataFrame(X)
+    return X.astype(pd.SparseDtype(float, fill_value=default_fill_value))
+
 
 _scipy_matrix_types = [
     sparse.csr_matrix,
@@ -36,22 +84,30 @@ def _no_warning_dia_matrix(*args, **kwargs):
 
 _pandas_sparse_matrix_types = [
     SparseDataFrame,
+    SparseDataFrame_deprecated,
 ]
 
-_pandas_matrix_types = [
-    pd.DataFrame,
-    SparseDataFrame,
+_pandas_vector_types = [
+    pd.Series,
+    SparseSeries,
+    SparseSeries_deprecated
 ]
 
+_pandas_matrix_types = _pandas_dense_matrix_types + _pandas_sparse_matrix_types
+
 _indexable_matrix_types = [
     sparse.csr_matrix,
     sparse.csc_matrix,
     sparse.lil_matrix,
-    sparse.dok_matrix,
-    np.array,
-    pd.DataFrame,
-    SparseDataFrame
-]
+    sparse.dok_matrix
+] + _numpy_matrix_types + _pandas_matrix_types
+
+
+def _typename(X):
+    if isinstance(X, pd.DataFrame) and not isinstance(X, pd.SparseDataFrame) and hasattr(X, "sparse"):
+        return "DataFrame[SparseArray]"
+    else:
+        return type(X).__name__
 
 
 def test_matrix_types(X, test_fun, matrix_types, *args, **kwargs):
@@ -66,13 +122,17 @@ def test_matrix_types(X, test_fun, matrix_types, *args, **kwargs):
     **kwargs : keyword arguments for test_fun
     """
     for fun in matrix_types:
+        if fun is SparseDataFrame_deprecated or fun is SparseSeries_deprecated:
+            _ignore_pandas_sparse_warning()
         Y = fun(X.copy())
         try:
             test_fun(Y, *args, **kwargs)
         except Exception as e:
             raise RuntimeError("{} with {} input to {}\n{}".format(
-                type(e).__name__, type(Y).__name__, test_fun.__name__,
+                type(e).__name__, _typename(Y), test_fun.__name__,
                 str(e)))
+        finally:
+            _reset_warnings()
 
 
 def test_dense_matrix_types(X, test_fun, *args, **kwargs):
diff --git a/test/tools/utils.py b/test/tools/utils.py
index 092a2cf9..25671c2e 100644
--- a/test/tools/utils.py
+++ b/test/tools/utils.py
@@ -3,6 +3,7 @@
 import pandas as pd
 from nose.tools import assert_raises
 from scprep.utils import toarray
+from . import matrix
 
 
 def assert_all_equal(X, Y):
@@ -40,15 +41,10 @@ def assert_transform_equals(X, Y, transform, check=assert_all_equal, **kwargs):
     -------
     Y2 : returned value of transform(X, **kwargs)
     """
-    try:
-        Y2 = transform(X, **kwargs)
-    except Exception as e:
-        raise RuntimeError("{} with {} input to {}\n{}".format(
-            type(e).__name__, type(X).__name__, transform,
-            str(e)))
+    Y2 = transform(X, **kwargs)
     check(Y, Y2), "{} failed on {}".format(
         transform,
-        type(X).__name__)
+        matrix._typename(X))
     return Y2
 
 
@@ -89,7 +85,7 @@ def assert_transform_equivalent(X, Y, transform, check=assert_all_equal,
     Y2 = assert_transform_equals(X, Y, transform, check=check, **kwargs)
     assert assert_matrix_class_equivalent(X, Y2), \
         "{} produced inconsistent matrix output".format(
-        type(X).__name__)
+        _typename(X))
 
 
 def assert_transform_raises(X, transform, exception=ValueError, **kwargs):
@@ -104,6 +100,18 @@ def assert_transform_raises(X, transform, exception=ValueError, **kwargs):
     assert_raises(exception, transform, X, **kwargs)
 
 
+def _is_sparse_dataframe(X):
+    return isinstance(X, pd.SparseDataFrame) or \
+            (isinstance(X, pd.DataFrame) and hasattr(X, "sparse"))
+
+
+def _sparse_dataframe_density(X):
+    try:
+        return X.sparse.density
+    except AttributeError:
+        return X.density
+
+
 def assert_matrix_class_equivalent(X, Y):
     """Check the format of X and Y are the same
 
@@ -117,11 +125,14 @@ def assert_matrix_class_equivalent(X, Y):
     if sparse.issparse(X):
         assert sparse.issparse(Y)
         assert X.tocoo().nnz == Y.tocoo().nnz
+    elif isinstance(X, pd.SparseDataFrame):
+        assert _is_sparse_dataframe(Y)
     else:
-        assert type(X) == type(Y)
+        assert type(X) == type(Y), (type(X), type(Y))
+    if _is_sparse_dataframe(X):
+        assert _sparse_dataframe_density(X) == _sparse_dataframe_density(Y)
+        assert _sparse_dataframe_density(X) == _sparse_dataframe_density(Y)
     if isinstance(X, pd.DataFrame):
         assert np.all(X.columns == Y.columns)
         assert np.all(X.index == Y.index)
-    if isinstance(X, pd.SparseDataFrame) or isinstance(Y, pd.SparseDataFrame):
-        assert X.density == Y.density
     return True
diff --git a/travis_setup.R b/travis_setup.R
index 373614b1..a71289e6 100644
--- a/travis_setup.R
+++ b/travis_setup.R
@@ -3,4 +3,3 @@ if (!require("remotes")) install.packages("remotes", quietly=TRUE)
 remotes::update_packages(upgrade="always")
 if (!require("BiocManager")) install.packages("BiocManager", quietly=TRUE)
 BiocManager::install(update=TRUE, ask=FALSE)
-if (!require("splatter")) BiocManager::install("splatter", quietly=TRUE)