Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into katys/update-ruff-config
Browse files Browse the repository at this point in the history
  • Loading branch information
strixy16 committed Dec 17, 2024
2 parents 7dda5ed + 9b734be commit 5dbfc73
Show file tree
Hide file tree
Showing 14 changed files with 3,930 additions and 782 deletions.
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
# CHANGELOG


## v1.27.0 (2024-12-17)

### Features

- Add analysis functions ([#92](https://github.com/bhklab/readii/pull/92),
[`a57182c`](https://github.com/bhklab/readii/commit/a57182cb32f3bcd3600eaed53ad60ac145fe9d6f))

Includes correlation calculations and plotting those correlations as a heatmap and histogram

- **New Features** - Updated version number to 1.26.0 with new dependencies: `numpy`, `seaborn`, and
`pandas`. - Introduced a new module for analyzing READII outputs with several correlation
functions. - Added visualization functions for correlation data: heatmap and histogram. - New
validation function for DataFrame dimensions added. - Expanded platform support to include
`osx-64` and `win-64`.

- **Bug Fixes** - Enhanced error handling in correlation calculations and plot saving processes. -
Simplified exception handling in feature loading functions.

- **Documentation** - Improved docstrings for new functions and modules for better usability.

- **Chores** - Expanded linting configuration for broader coverage of Python files.


## v1.26.0 (2024-12-16)

### Features
Expand Down
1 change: 1 addition & 0 deletions config/ruff.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ include = [
"src/readii/cli/**/*.py",
"src/readii/negative_controls_refactor/**.py",
"src/readii/io/**/**.py",
"src/readii/analyze/**.py"
]

# extend-exclude is used to exclude directories from the flake8 checks
Expand Down
9 changes: 4 additions & 5 deletions notebooks/nifti_writer_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -26,7 +26,6 @@
"import subprocess\n",
"import SimpleITK as sitk\n",
"import pandas as pd\n",
"import uuid\n",
"import random\n",
"import sys\n",
"from readii.utils import logger"
Expand All @@ -41,7 +40,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -65,7 +64,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -210,7 +209,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
"version": "3.12.8"
}
},
"nbformat": 4,
Expand Down
4,026 changes: 3,253 additions & 773 deletions pixi.lock

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "readii"
version = "1.26.0"
version = "1.27.0"
description = "A package to extract radiomic features!"
authors = [{ name = "Katy Scott", email = "[email protected]" }]

Expand All @@ -13,6 +13,9 @@ dependencies = [
"pydicom>=2.3.1",
"pyradiomics-bhklab>=3.1.4,<4",
"orcestra-downloader>=0.9.0,<1",
"numpy==1.26.4.*",
"seaborn>=0.13.2,<0.14",
"pandas>=2.2.3,<3"
]
requires-python = ">=3.10, <3.13"

Expand Down
2 changes: 1 addition & 1 deletion src/readii/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# read version from installed package
from importlib.metadata import version
__version__ = "1.26.0"
__version__ = "1.27.0"

17 changes: 17 additions & 0 deletions src/readii/analyze/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Module to perform analysis on READII outputs."""
from .correlation import (
getCrossCorrelationMatrix,
getFeatureCorrelations,
getHorizontalSelfCorrelations,
getVerticalSelfCorrelations,
)
from .plot_correlation import plotCorrelationHeatmap, plotCorrelationHistogram

__all__ = [
'getFeatureCorrelations',
'getVerticalSelfCorrelations',
'getHorizontalSelfCorrelations',
'getCrossCorrelationMatrix',
'plotCorrelationHeatmap',
'plotCorrelationHistogram'
]
165 changes: 165 additions & 0 deletions src/readii/analyze/correlation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import pandas as pd

from readii.data.select import validateDataframeSubsetSelection
from readii.utils import logger


def getFeatureCorrelations(vertical_features:pd.DataFrame,
horizontal_features:pd.DataFrame,
method:str = "pearson",
vertical_feature_name:str = '_vertical',
horizontal_feature_name:str = '_horizontal') -> pd.DataFrame:
"""Calculate correlation between two sets of features.
Parameters
----------
vertical_features : pd.DataFrame
Dataframe containing features to calculate correlations with. Index must be the same as the index of the horizontal_features dataframe.
horizontal_features : pd.DataFrame
Dataframe containing features to calculate correlations with. Index must be the same as the index of the vertical_features dataframe.
method : str
Method to use for calculating correlations. Default is "pearson".
vertical_feature_name : str
Name of the vertical features to use as suffix in correlation dataframe. Default is "_vertical".
horizontal_feature_name : str
Name of the horizontal features to use as suffix in correlation dataframe. Default is "_horizontal".
Returns
-------
correlation_matrix : pd.DataFrame
Dataframe containing correlation values.
"""
# Check that features are dataframes
if not isinstance(vertical_features, pd.DataFrame):
msg = "vertical_features must be a pandas DataFrame"
logger.exception(msg)
raise TypeError()
if not isinstance(horizontal_features, pd.DataFrame):
msg = "horizontal_features must be a pandas DataFrame"
logger.exception(msg)
raise TypeError()

# Check for empty DataFrames
if vertical_features.empty or horizontal_features.empty:
msg = "Cannot calculate correlations with empty DataFrames"
logger.exception(msg)
raise ValueError(msg)

if method not in ["pearson", "spearman", "kendall"]:
msg = "Correlation method must be one of 'pearson', 'spearman', or 'kendall'."
logger.exception(msg)
raise ValueError()

if not vertical_features.index.equals(horizontal_features.index):
msg = "Vertical and horizontal features must have the same index to calculate correlation. Set the index to the intersection of patient IDs."
logger.exception(msg)
raise ValueError()

# Add _ to beginnging of feature names if they don't start with _ so they can be used as suffixes
if not vertical_feature_name.startswith("_"):
vertical_feature_name = f"_{vertical_feature_name}"
if not horizontal_feature_name.startswith("_"):
horizontal_feature_name = f"_{horizontal_feature_name}"

# Join the features into one dataframe
# Use inner join to keep only the rows that have a value in both vertical and horizontal features
features_to_correlate = vertical_features.join(horizontal_features,
how='inner',
lsuffix=vertical_feature_name,
rsuffix=horizontal_feature_name)

try:
# Calculate correlation between vertical features and horizontal features
correlation_matrix = features_to_correlate.corr(method=method)
except Exception as e:
msg = f"Error calculating correlation matrix: {e}"
logger.exception(msg)
raise e

return correlation_matrix



def getVerticalSelfCorrelations(correlation_matrix:pd.DataFrame,
num_vertical_features:int) -> pd.DataFrame:
"""Get the vertical (y-axis) self correlations from a correlation matrix. Gets the top left quadrant of the correlation matrix.
Parameters
----------
correlation_matrix : pd.DataFrame
Dataframe containing the correlation matrix to get the vertical self correlations from.
num_vertical_features : int
Number of vertical features in the correlation matrix.
Returns
-------
pd.DataFrame
Dataframe containing the vertical self correlations from the correlation matrix.
"""
try:
validateDataframeSubsetSelection(correlation_matrix, num_vertical_features, num_vertical_features)
except ValueError as e:
msg = "Number of vertical features provided is greater than the number of rows or columns in the correlation matrix."
logger.exception(msg)
raise e

# Get the correlation matrix for vertical vs vertical - this is the top left corner of the matrix
return correlation_matrix.iloc[0:num_vertical_features, 0:num_vertical_features]



def getHorizontalSelfCorrelations(correlation_matrix:pd.DataFrame,
num_horizontal_features:int) -> pd.DataFrame:
"""Get the horizontal (x-axis) self correlations from a correlation matrix. Gets the bottom right quadrant of the correlation matrix.
Parameters
----------
correlation_matrix : pd.DataFrame
Dataframe containing the correlation matrix to get the horizontal self correlations from.
num_horizontal_features : int
Number of horizontal features in the correlation matrix.
Returns
-------
pd.DataFrame
Dataframe containing the horizontal self correlations from the correlation matrix.
"""
try:
validateDataframeSubsetSelection(correlation_matrix, num_horizontal_features, num_horizontal_features)
except ValueError as e:
msg = "Number of horizontalfeatures provided is greater than the number of rows or columns in the correlation matrix."
logger.exception(msg)
raise e

# Get the index of the start of the horizontal correlations
start_of_horizontal_correlations = len(correlation_matrix.columns) - num_horizontal_features

# Get the correlation matrix for horizontal vs horizontal - this is the bottom right corner of the matrix
return correlation_matrix.iloc[start_of_horizontal_correlations:, start_of_horizontal_correlations:]



def getCrossCorrelationMatrix(correlation_matrix:pd.DataFrame,
num_vertical_features:int) -> pd.DataFrame:
"""Get the cross correlation matrix subsection for a correlation matrix. Gets the top right quadrant of the correlation matrix so vertical and horizontal features are correctly labeled.
Parameters
----------
correlation_matrix : pd.DataFrame
Dataframe containing the correlation matrix to get the cross correlation matrix subsection from.
num_vertical_features : int
Number of vertical features in the correlation matrix.
Returns
-------
pd.DataFrame
Dataframe containing the cross correlations from the correlation matrix.
"""
try:
validateDataframeSubsetSelection(correlation_matrix, num_vertical_features, num_vertical_features)
except ValueError as e:
msg = "Number of vertical features provided is greater than the number of rows or columns in the correlation matrix."
logger.exception(msg)
raise e

return correlation_matrix.iloc[0:num_vertical_features, num_vertical_features:]
Loading

0 comments on commit 5dbfc73

Please sign in to comment.