Skip to content

Commit

Permalink
Merge pull request hiclib#14 from NelleV/select_partial_matrix
Browse files Browse the repository at this point in the history
ENH added utility function to select subsample of the matrix
  • Loading branch information
NelleV committed Mar 16, 2016
2 parents 470e355 + 846d371 commit 6e1ecec
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 11 deletions.
7 changes: 1 addition & 6 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
.. -*- mode: rst -*-
|Travis|_ |AppVeyor|_ |Coveralls|_
|Travis|_ |Coveralls|_

.. |Travis| image:: https://api.travis-ci.org/hiclib/iced.png?branch=master
.. _Travis: https://travis-ci.org/hiclib/iced

.. |AppVeyor| image::
https://ci.appveyor.com/api/projects/status/github/hiclib/iced?branch=master&svg=true
.. _AppVeyor:
https://ci.appveyor.com/project/sklearn-ci/scikit-learn/history

.. |Coveralls| image::
https://coveralls.io/repos/hiclib/iced/badge.svg?branch=master&service=github
.. _Coveralls: https://coveralls.io/r/hiclib/iced
Expand Down
24 changes: 22 additions & 2 deletions doc/modules/classes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,13 @@ Functions

.. _filter_ref:


:mod:`iced.datasets`: Datasets
===============================================

.. automodule:: iced.datasets
:no-members:
:no-inherited-members:


Functions
---------
.. currentmodule:: iced
Expand All @@ -72,3 +70,25 @@ Functions
.. _datasets_ref:


:mod:`iced.utils`: Utils
===============================================

.. automodule:: iced.utils
:no-members:
:no-inherited-members:


Functions
---------
.. currentmodule:: iced

.. autosummary::
:toctree: generated/
:template: function.rst


utils.get_intra_mask
utils.get_inter_mask
utils.extract_sub_contact_map

.. _utils_ref:
9 changes: 9 additions & 0 deletions doc/modules/utils.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
.. _utils:

==============
Utilities
==============

The :mod:`utils` submodule contains utilities function.

.. currentmodule:: iced.utils
26 changes: 26 additions & 0 deletions examples/utils/plot_extract_sample_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import matplotlib.pyplot as plt
from matplotlib import colors

from iced import datasets
from iced.utils import extract_sub_contact_map

"""
Extracting parts of a contact map.
This example shows how to extract contact counts associated to some chromosomes
of the contact maps. Here, we extract chromosomes 1, 4 and 5 of the budding
yeasts contact map
"""

# Loading a sample dataset
counts, lengths = datasets.load_sample_yeast()
sub_counts, sub_lengths = extract_sub_contact_map(counts, lengths, [0, 3, 4])

fig, ax = plt.subplots()
m = ax.matshow(sub_counts, cmap="Blues", norm=colors.SymLogNorm(1),
origin="bottom",
extent=(0, len(sub_counts), 0, len(sub_counts)))
[ax.axhline(i, linewidth=1, color="#000000") for i in lengths.cumsum()]
[ax.axvline(i, linewidth=1, color="#000000") for i in lengths.cumsum()]
cb = fig.colorbar(m)
ax.set_title("Chromosomes I, IV and V of yeast")
3 changes: 2 additions & 1 deletion iced/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def load_sample_yeast():
Load and return a sample of S. cerevisiae contact count matrix from duan
et al, Nature, 2009
Returns:
Returns
-------
counts, lengths:
tuple of two elements, the first a contact count matrix, the
second an ndarray containing the lengths of the chromosomes.
Expand Down
4 changes: 3 additions & 1 deletion iced/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def ICE_normalization(X, SS=None, max_iter=3000, eps=1e-4, copy=True,
norm='l1', verbose=0, output_bias=False,
total_counts=None):
total_counts=None, counts_profile=None):
"""
ICE normalization
Expand Down Expand Up @@ -89,6 +89,8 @@ def ICE_normalization(X, SS=None, max_iter=3000, eps=1e-4, copy=True,
raise NotImplementedError

dbias = sum_ds.reshape((m, 1))
if counts_profile is not None:
dbias /= counts_profile[:, np.newaxis]
# To avoid numerical instabilities
dbias /= dbias[dbias != 0].mean()

Expand Down
81 changes: 81 additions & 0 deletions iced/utils/_genome.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from .validation import is_symetric_or_tri


def get_intra_mask(lengths):
Expand Down Expand Up @@ -71,6 +72,86 @@ def get_genomic_distances(lengths):
return dis.astype(int)


def extract_sub_contact_map(counts, lengths, chromosomes):
"""
Extract the contact map associated to a given list of chromosome
Parameters
----------
counts : ndarray (n, n)
lengths : ndarray (L, )
chromosomes : list of ids
Returns
-------
sub_counts, sub_lengths : (ndarray, ndarray)
Examples
--------
>>> from iced import datasets
>>> from iced.utils import extract_sub_contact_map
>>> counts, lengths = datasets.load_sample_yeast()
>>> scounts, slengths = extract_sub_contact_map(counts, lengths, [0, 2])
>>> print len(counts), len(scounts)
... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
350 56
"""
chromosomes = np.array(chromosomes)
if chromosomes.max() >= len(lengths):
raise ValueError(
"The chromosomes provided are not compatible with the "
"lengths array. Possible values are"
" %s" % " ".join("%s" % i for i in np.arange(len(lengths))))
if lengths.sum() != counts.shape[0]:
raise ValueError(
"The lengths provided is incompatible with the counts matrix"
"shape. The total lengths is %d while the contact count matrix "
"is %d" % (lengths.sum(), counts.shape[0]))

is_symetric_or_tri(counts)
chromosomes.sort()

new_lengths = lengths[chromosomes]
new_counts = np.zeros((new_lengths.sum(), new_lengths.sum()))
begin1, end1 = 0, 0
for i, l1 in enumerate(lengths):
end1 += l1
if i not in chromosomes:
begin1 = end1
continue
# Find position of this pair of chromosome in the matrix
new_num_chrom = (chromosomes == i).argmax()
if new_num_chrom == 0:
new_begin1 = 0
else:
new_begin1 = new_lengths.cumsum()[new_num_chrom - 1]
new_end1 = new_lengths.cumsum()[new_num_chrom]

begin2, end2 = 0, 0
for j, l2 in enumerate(lengths):
end2 += l2
if j not in chromosomes:
begin2 = end2
continue
# Find position of this pair of chromosome in the matrix
new_num_chrom = (chromosomes == j).argmax()
if new_num_chrom == 0:
new_begin2 = 0
else:
new_begin2 = new_lengths.cumsum()[new_num_chrom - 1]
new_end2 = new_lengths.cumsum()[new_num_chrom]
new_counts[new_begin1:new_end1,
new_begin2:new_end2] = counts[begin1:end1, begin2:end2]
begin2 = end2

begin1 = end1

return new_counts, new_lengths


def undersample_per_chr(X, lengths):
"""
Undersample matrix to chromosomes
Expand Down
1 change: 1 addition & 0 deletions iced/utils/_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

13 changes: 12 additions & 1 deletion iced/utils/tests/test_genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from iced.utils._genome import get_inter_mask
from iced.utils._genome import _change_lengths_resolution
from iced.utils._genome import undersample_per_chr
from iced.utils._genome import extract_sub_contact_map


def test_get_intra_mask():
Expand All @@ -28,7 +29,7 @@ def test_get_inter_mask():
true_mask = np.zeros((10, 10))
true_mask[:5, :5] = 1
true_mask[5:, 5:] = 1
assert_array_equal(mask, true_mask.astype(bool) == False)
assert_array_equal(mask, np.invert(true_mask.astype(bool)))


def test_undersample_per_chr():
Expand All @@ -41,3 +42,13 @@ def test_undersample_per_chr():
undersampled_X_true = np.array([[1, 0],
[0, 0.5]])
assert_array_equal(undersampled_X_true, undersampled_X)


def test_return_sample():
lengths = np.array([50, 75])
n = lengths.sum()
X = np.random.randint(0, 50, (n, n))
X = np.triu(X)
sub_X, _ = extract_sub_contact_map(X, lengths, [0])
assert_array_equal(X[:lengths[0], :lengths[0]],
sub_X)

0 comments on commit 6e1ecec

Please sign in to comment.