-
Notifications
You must be signed in to change notification settings - Fork 0
/
t6.py
28 lines (20 loc) · 1.03 KB
/
t6.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import scanpy as sc
import pandas as pd
# Load the dataset
adata = sc.read_h5ad("Al.h5ad")
# Check the unique values in the 'sex' column to confirm labels
print("Unique values in 'sex':", adata.obs['sex'].unique())
# Separate the data by gender
male_data = adata.obs[adata.obs['sex'] == "male"].copy()
female_data = adata.obs[adata.obs['sex'] == "female"].copy()
# Reset index for each group to clean up the dataframes
male_data.reset_index(inplace=True, drop=True)
female_data.reset_index(inplace=True, drop=True)
# Perform comparison of summary statistics between male and female cohorts
comparison_gender = male_data.describe().compare(female_data.describe())
# Rename columns to 'Male' and 'Female' instead of 'self' and 'other' for clarity
comparison_gender.columns = comparison_gender.columns.map(lambda x: ('Male' if x[0] == 'self' else 'Female', x[1]))
# Display the comparison result
print("Comparison of summary statistics between male and female cohorts:")
print(comparison_gender)
comparison_gender.to_csv("gender_cohort_comparison.csv")