Skip to content

Commit

Permalink
Update dosc src and readme
Browse files Browse the repository at this point in the history
  • Loading branch information
andyzhangstat committed May 21, 2024
1 parent 9b288a9 commit 7fd5fc5
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 167 deletions.
39 changes: 20 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ There are two major functions in this package:

`sparse_logistic_svd_coord_2_way(dat, lambdas=np.logspace(-2, 2, num=10), etas=np.logspace(-2, 2, num=10), k=2, quiet=True,
max_iters=100, conv_crit=1e-5, randstart=False,
normalize=False, start_A=None, start_B=None, start_mu=None)`: This function performs Twao-way Sparse Logistic Singular Value Decomposition (SLSVD) using Majorization-Minimization and Coordinate Descent algorithms.
normalize=False, start_A=None, start_B=None, start_mu=None)`: This function performs Two-way Sparse Logistic Singular Value Decomposition (SLSVD) using Majorization-Minimization and Coordinate Descent algorithms.



Expand All @@ -44,6 +44,7 @@ There are two major functions in this package:
- `random_seed` (integer): Random seed to ensure reproducibility.
- `dat`: Input data matrix.
- `lambdas`: Array of regularization parameters.
- `etas`: Array of regularization parameters.
- `k`: Number of components.
- `quiet`: Boolean to suppress iteration printouts.
- `max_iters`: Maximum number of iterations.
Expand Down Expand Up @@ -118,15 +119,15 @@ poetry run pytest --cov-branch --cov=SLSVD2 --cov-report html

## Usage

Use this package to find the optimized score and loading matrices of sparse logistic Singular Value Decomposition. In the following example, we generate a simulated data set with defined size first. By the Majorization-Minimization and Coordinate Descent algorithms, we obtain the optimized score and loading matrices. Finally, we visualize both the simulated data and fitted loadings in one figure.
Use this package to find the optimized score and loading matrices of two-way sparse logistic Singular Value Decomposition. In the following example, we generate a simulated data set with defined size first. By the Majorization-Minimization and Coordinate Descent algorithms, we obtain the optimized score and loading matrices. Finally, we visualize both the simulated data and fitted loadings in one figure.

Example usage:

```python
>>> from slsvd.data_generation import generate_data
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> bin_mat, loadings, scores, diagonal=generate_data(n=200, d=100, rank=2, random_seed=123)
>>> bin_mat, loadings, scores, diagonal=generate_data_2_way(n=200, d=100, rank=2, random_seed=123)

# Check shapes
>>> print("Binary Matrix Shape:", bin_mat.shape)
Expand All @@ -151,8 +152,8 @@ Loadings Shape: (100, 2)
Scores Shape: (200, 2)
Dot Product of Scores:
array([[195.4146256 , 2.67535881],
[ 2.67535881, 200.14653178]])
array([[1., 0.],
[0., 1.]])
Dot Product of Loadings:
array([[1., 0.],
Expand All @@ -162,19 +163,20 @@ array([[1., 0.],


```python
>>> plt.figure(figsize=(8, 12))
>>> cmap = plt.cm.get_cmap('viridis', 2)

>>> plt.figure(figsize=(6, 9))
>>> colors = ['cyan', 'magenta']
>>> cmap = plt.matplotlib.colors.ListedColormap(colors, name='custom_cmap', N=2)
>>> plt.imshow(bin_mat, cmap=cmap, interpolation='nearest')

>>> cbar = plt.colorbar(ticks=[0.25, 0.75])
>>> cbar.ax.set_yticklabels(['0', '1'])

>>> plt.title('Heatmap of Binary Matrix')
>>> plt.title('Heatmap of Simulated Binary Matrix')
>>> plt.xlabel('Feature')
>>> plt.ylabel('Sample')

>>> plt.tight_layout()

>>> plt.show()

```


Expand All @@ -187,7 +189,7 @@ array([[1., 0.],
>>> import numpy as np

>>> # Perform Sparse Logistic SVD
>>> mu, A, B, zeros, BICs = sparse_logistic_svd_coord(bin_mat, lambdas=np.logspace(-2, 1, num=10), k=2)
>>> mu, A, B, S, zeros, BICs = sparse_logistic_svd_coord_2_way(bin_mat, lambdas=np.logspace(-2, 1, num=10), etas=np.logspace(-2, 1, num=10), k=2)

>>> # Calculate mean of mu
>>> print("Mean of mu:", np.mean(mu))
Expand All @@ -203,15 +205,15 @@ array([[1., 0.],


```
Mean of mu: 0.052624279581212116
Mean of mu: 0.07933574417007386
Dot Product of Scores:
array([[7672.61634966, 277.23466856],
[ 277.23466856, 3986.24113586]])
array([[1. , 0.02601576],
[0.02601576, 1. ]])
Dot Product of Loadings:
array([[1. , 0.00111067],
[0.00111067, 1. ]])
array([[1. , 0.03334437],
[0.03334437, 1. ]])
```

Expand All @@ -221,8 +223,7 @@ array([[1. , 0.00111067],
## Documentations


Online documentation is available [readthedocs](https://slsvd.readthedocs.io/en/latest/?badge=latest).

Online documentation is available [readthedocs](https://slsvd2.readthedocs.io/en/latest/?badge=latest).
Publishing on [TestPyPi](https://test.pypi.org/project/slsvd2/) and [PyPi](https://pypi.org/project/slsvd2/).

## Contributors
Expand Down
8 changes: 2 additions & 6 deletions docs/example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,8 @@
}
],
"source": [
"plt.figure(figsize=(6, 9)) # Adjust the width and height as needed\n",
"plt.figure(figsize=(6, 9)) \n",
"\n",
"# Create a custom colormap using cyan and magenta\n",
"colors = ['cyan', 'magenta']\n",
"cmap = plt.matplotlib.colors.ListedColormap(colors, name='custom_cmap', N=2)\n",
"\n",
Expand All @@ -250,11 +249,8 @@
"plt.xlabel('Feature')\n",
"plt.ylabel('Sample')\n",
"\n",
"# Use tight layout to reduce white space\n",
"plt.tight_layout()\n",
"\n",
"#plt.savefig('heatmap.png', format='png', dpi=300)\n",
"# Show the plot\n",
"plt.show()\n"
]
},
Expand Down Expand Up @@ -389,7 +385,7 @@
"metadata": {},
"outputs": [],
"source": [
"mu, A, B,S, zeros, BICs=sparse_logistic_svd_coord_2_way(dat=bin_mat, lambdas=thelam, etas=theeta, k=2) "
"mu, A, B, S, zeros, BICs=sparse_logistic_svd_coord_2_way(dat=bin_mat, lambdas=thelam, etas=theeta, k=2) "
]
},
{
Expand Down
142 changes: 0 additions & 142 deletions src/slsvd2/slsvd2.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,148 +25,6 @@ def inv_logit_mat(x, min_val=0, max_val=1):



# def sparse_logistic_svd_coord_2_way(dat, lambdas=np.logspace(-2, 2, num=10), etas=np.logspace(-2, 2, num=10), k=2, quiet=True,
# max_iters=100, conv_crit=1e-5, randstart=False,
# normalize=False, start_A=None, start_B=None, start_mu=None):
# """
# Sparse Logistic SVD biclustering with Coordinate Descent.

# Parameters:
# dat : ndarray
# Input data matrix.
# lambdas : array_like, optional
# Array of regularization parameters.
# etas : array_like, optional
# Array of regularization parameters.
# k : int, optional
# Number of components.
# quiet : bool, optional
# If True, suppresses iteration printouts.
# max_iters : int, optional
# Maximum number of iterations.
# conv_crit : float, optional
# Convergence criterion.
# randstart : bool, optional
# If True, uses random initialization.
# normalize : bool, optional
# If True, normalizes the components.
# start_A : ndarray, optional
# Initial value for matrix A.
# start_B : ndarray, optional
# Initial value for matrix B.
# start_mu : ndarray, optional
# Initial value for mean vector.

# Returns:
# tuple
# Tuple containing mu, A, B, zeros_mat, BICs.
# - mu: The mean vector.
# - A: The matrix A.
# - B: The matrix B.
# - zeros_mat: Matrix indicating the number of zeros in each component.
# - BICs: Matrix containing the Bayesian Information Criterion for each component.
# """

# q = 2 * dat - 1
# q[np.isnan(q)] = 0

# n, d = dat.shape

# if not randstart:
# mu = np.mean(q)
# udv = svd((q - np.mean(q)).T, full_matrices=False)
# B = udv[0][:, :k]
# A = udv[2][:k, :].T
# S = np.diag(udv[1][:k])
# else:
# mu = np.random.randn(d)
# A = np.random.uniform(-1, 1, size=(n, k))
# B = np.random.uniform(-1, 1, size=(d, k))
# S = np.diag(np.ones(k))

# if start_B is not None:
# B = start_B / np.sqrt(np.sum(start_B**2, axis=0))

# if start_A is not None:
# A = start_A / np.sqrt(np.sum(start_A**2, axis=0))

# if start_mu is not None:
# mu = start_mu

# BICs = np.zeros((len(lambdas) * len(etas), k))
# zeros_mat = np.zeros((len(lambdas) * len(etas), k))
# iters = np.zeros((len(lambdas) * len(etas), k))

# theta = mu + (A @ S @ B.T)

# X = theta + 4 * q * (1 - inv_logit_mat(q * theta))
# Xcross = X - (A @ S @ B.T)
# mu = np.mean(Xcross)

# for m in range(k):
# last_A = A.copy()
# last_B = B.copy()

# theta = mu + (A @ S @ B.T)
# X = theta + 4 * q * (1 - inv_logit_mat(q * theta))
# Xm = X - (mu + A[:, np.arange(k) != m] @ np.diag(S[np.arange(k) != m, np.arange(k) != m]) @ B[:, np.arange(k) != m].T)

# idx = 0
# for lambda_val in lambdas:
# for eta_val in etas:
# for i in range(max_iters):
# if np.sum(B[:, m]**2) == 0:
# A[:, m] = Xm @ B[:, m]
# break
# if np.sum(A[:, m]**2) == 0:
# B[:, m] = Xm.T @ A[:, m]
# break

# A_lse = Xm @ B[:, m]
# A[:, m] = np.sign(A_lse) * np.maximum(0, np.abs(A_lse) - eta_val)
# S[m,m] = np.sqrt(np.sum(A[:, m]**2))
# A[:, m] = A[:, m] / np.sqrt(np.sum(A[:, m]**2))

# B_lse = Xm.T @ A[:, m]
# B[:, m] = np.sign(B_lse) * np.maximum(0, np.abs(B_lse) - lambda_val)
# S[m,m] = np.sqrt(np.sum(B[:, m]**2))
# B[:, m] = B[:, m] / np.sqrt(np.sum(B[:, m]**2))

# loglike = np.sum(np.log(inv_logit_mat(q * (mu + (A @ S @ B.T))))[~np.isnan(dat)])
# penalty = 0.25 * lambda_val * np.sum(np.abs(B[:, m])) + 0.25 * eta_val * np.sum(np.abs(A[:, m]))
# cur_loss = (-loglike + penalty) / np.sum(~np.isnan(dat))

# if not quiet:
# print(m, " ", np.round(-loglike, 4), " ", np.round(penalty, 4),
# " ", np.round(-loglike + penalty, 4))

# if i > 4:
# if (last_loss - cur_loss) / last_loss < conv_crit:
# break

# last_loss = cur_loss

# BICs[idx, m] = -2 * loglike + np.log(n * d) * (1 + np.count_nonzero(B[:, m]) + np.count_nonzero(A[:, m]))
# zeros_mat[idx, m] = np.count_nonzero(B[:, m]) + np.count_nonzero(A[:, m])
# iters[idx, m] = i
# idx += 1

# best_idx = np.argmin(BICs[:, m])
# best_lambda_idx, best_eta_idx = divmod(best_idx, len(etas))
# B[:, m] = B[:, best_lambda_idx * len(etas) + best_eta_idx]
# A[:, m] = A[:, best_lambda_idx * len(etas) + best_eta_idx]

# if normalize:
# A = A / np.sqrt(np.sum(A**2, axis=0))
# B = B / np.sqrt(np.sum(B**2, axis=0))

# return mu, A, B, S, zeros_mat, BICs






def sparse_logistic_svd_coord_2_way(dat, lambdas=np.logspace(-2, 2, num=10), etas=np.logspace(-2, 2, num=10), k=2, quiet=True,
max_iters=100, conv_crit=1e-5, randstart=False,
normalize=False, start_A=None, start_B=None, start_mu=None):
Expand Down

0 comments on commit 7fd5fc5

Please sign in to comment.