|
| 1 | +import sys |
| 2 | +from pathlib import Path |
| 3 | +import pytest |
| 4 | + |
| 5 | +import mudata as md |
| 6 | +import numpy as np |
| 7 | +import scanpy as sc |
| 8 | +import muon as mu |
| 9 | +import pandas as pd |
| 10 | + |
| 11 | +## VIASH START |
| 12 | +meta = { |
| 13 | + 'executable': './target/docker/qc/calculate_atac_qc_metrics/calculate_atac_qc_metrics', |
| 14 | + 'resources_dir': "./resources_test/cellranger_atac_tiny_bcl/counts/", |
| 15 | + 'config': './src/qc/calculate_atac_qc_metrics/config.vsh.yaml', |
| 16 | + 'cpus': 2 |
| 17 | +} |
| 18 | +## VIASH END |
| 19 | + |
| 20 | +@pytest.fixture |
| 21 | +def synthetic_example(): |
| 22 | + atac = sc.AnnData(np.array([ |
| 23 | + [0, 0, 0], |
| 24 | + [1, 0, 1], |
| 25 | + [10, 0, 0], |
| 26 | + [100, 0, 1], |
| 27 | + [1000, 0, 0] |
| 28 | + ])) |
| 29 | + atac.obs_names = ["A", "B", "C", "D", "E"] |
| 30 | + atac.var_names = ["x", "y", "z"] |
| 31 | + |
| 32 | + return md.MuData({"atac": atac}) |
| 33 | + |
| 34 | +@pytest.fixture |
| 35 | +def example_mudata(tmp_path, synthetic_example): |
| 36 | + mdata_path = tmp_path / "example.h5mu" |
| 37 | + synthetic_example.write(mdata_path) |
| 38 | + |
| 39 | + return mdata_path |
| 40 | + |
| 41 | +@pytest.fixture |
| 42 | +def example_mudata_with_layer(tmp_path, synthetic_example): |
| 43 | + synthetic_example.mod["atac"].layers["atac_counts"] = synthetic_example.mod["atac"].X.copy() |
| 44 | + synthetic_example.mod["atac"].X = np.random.normal(size=synthetic_example.mod["atac"].X.shape) |
| 45 | + mdata_path = tmp_path / "example.h5mu" |
| 46 | + synthetic_example.write(mdata_path) |
| 47 | + |
| 48 | + return mdata_path |
| 49 | + |
| 50 | +@pytest.fixture |
| 51 | +def neurips_mudata(tmp_path): |
| 52 | + """From the `NeurIPS Multimodal Single-Cell Integration Challenge |
| 53 | + <https://www.kaggle.com/competitions/open-problems-multimodal/data>` |
| 54 | + |
| 55 | + Link is taken from the Moscot repository: |
| 56 | + https://github.com/theislab/moscot/blob/cb53435c80fafe58046ead3c42a767fd0b818aaa/src/moscot/datasets.py#L67 |
| 57 | +
|
| 58 | + """ |
| 59 | + adata = sc.read("../data/neurips_data.h5ad", backup_url="https://figshare.com/ndownloader/files/37993503") |
| 60 | + |
| 61 | + mdata = md.MuData({"atac": adata}) |
| 62 | + mdata_path = tmp_path / "neurips.h5mu" |
| 63 | + mdata.write(mdata_path) |
| 64 | + |
| 65 | + return mdata_path |
| 66 | + |
| 67 | +@pytest.fixture |
| 68 | +def tiny_atac_mudata(tmp_path): |
| 69 | + resources_dir = Path(meta["resources_dir"]) |
| 70 | + mdata = mu.read_10x_h5(resources_dir / "counts" / "filtered_peak_bc_matrix.h5") |
| 71 | + mu.atac.tl.locate_fragments(mdata, fragments=str(resources_dir / "counts" / "fragments.tsv.gz")) |
| 72 | + assert "files" in mdata.mod["atac"].uns.keys() |
| 73 | + assert "fragments" in mdata.mod["atac"].uns["files"].keys() |
| 74 | + |
| 75 | + # Read features annotation and save it to uns |
| 76 | + peak_annotation = pd.read_csv(resources_dir / "counts" / "peak_annotation.tsv", sep="\t") |
| 77 | + peak_annotation.columns = ["Chromosome", "Start", "End", "gene", "distance", "peak_type"] |
| 78 | + peak_annotation["gene"] = peak_annotation["gene"].astype(str) # Fixes saving error |
| 79 | + mdata.mod["atac"].uns["peak_annotation"] = peak_annotation |
| 80 | + |
| 81 | + mdata_path = tmp_path / "tiny_atac.h5mu" |
| 82 | + mdata.write(mdata_path) |
| 83 | + |
| 84 | + return mdata_path |
| 85 | + |
| 86 | +@pytest.mark.parametrize("mudata", ["example_mudata", "neurips_mudata", "tiny_atac_mudata"]) |
| 87 | +def test_qc_columns_in_tables(run_component, request, mudata, tmp_path): |
| 88 | + input_path = request.getfixturevalue(mudata) |
| 89 | + output_path = tmp_path / "foo.h5mu" |
| 90 | + |
| 91 | + args = [ |
| 92 | + "--input", str(input_path), |
| 93 | + "--output", str(output_path), |
| 94 | + "--modality", "atac", |
| 95 | + "--n_fragments_for_nucleosome_signal", "100" |
| 96 | + ] |
| 97 | + |
| 98 | + run_component(args) |
| 99 | + assert output_path.is_file() |
| 100 | + data_with_qc = md.read(output_path) |
| 101 | + |
| 102 | + for qc_metric in ("n_features_per_cell", "total_fragment_counts", "log_total_fragment_counts"): |
| 103 | + assert qc_metric in data_with_qc.mod["atac"].obs |
| 104 | + for qc_metric in ("n_cells_by_counts", "mean_counts", "pct_dropout_by_counts", "total_counts"): |
| 105 | + assert qc_metric in data_with_qc.mod["atac"].var |
| 106 | + |
| 107 | + # Check that ATAC-specific metrics are calculated if fragments information is present (for tiny ATAC data) |
| 108 | + if "files" in data_with_qc.mod["atac"].uns and "fragments" in data_with_qc.mod["atac"].uns["files"]: |
| 109 | + assert "nucleosome_signal" in data_with_qc.mod["atac"].obs |
| 110 | + |
| 111 | + if "peak_annotation" in data_with_qc.mod["atac"].uns.keys(): |
| 112 | + assert "tss_score" in data_with_qc.mod["atac"].obs |
| 113 | + |
| 114 | + |
| 115 | +@pytest.mark.parametrize("mudata", ["example_mudata", "example_mudata_with_layer"]) |
| 116 | +def test_calculations_correctness(request, run_component, mudata, tmp_path): |
| 117 | + input_path = request.getfixturevalue(mudata) |
| 118 | + output_path = tmp_path / "foo.h5mu" |
| 119 | + |
| 120 | + args = [ |
| 121 | + "--input", str(input_path), |
| 122 | + "--output", str(output_path), |
| 123 | + "--modality", "atac", |
| 124 | + "--n_fragments_for_nucleosome_signal", "100" |
| 125 | + ] |
| 126 | + |
| 127 | + if mudata == "example_mudata_with_layer": |
| 128 | + args.extend(["--layer", "atac_counts"]) |
| 129 | + |
| 130 | + run_component(args) |
| 131 | + assert output_path.is_file() |
| 132 | + data_with_qc = md.read(output_path) |
| 133 | + |
| 134 | + assert np.allclose(data_with_qc.mod["atac"].obs["n_features_per_cell"], [0, 2, 1, 2, 1]) |
| 135 | + assert np.allclose(data_with_qc.mod["atac"].obs["total_fragment_counts"], [0, 2, 10, 101, 1000]) |
| 136 | + assert np.allclose(data_with_qc.mod["atac"].obs["log_total_fragment_counts"], [-np.inf, np.log10(2), np.log10(10), np.log10(101), np.log10(1000)]) |
| 137 | + |
| 138 | + assert np.allclose(data_with_qc.mod["atac"].var["n_cells_by_counts"], [4, 0, 2]) |
| 139 | + assert np.allclose(data_with_qc.mod["atac"].var["mean_counts"], [222.2, 0, 0.4]) |
| 140 | + assert np.allclose(data_with_qc.mod["atac"].var["pct_dropout_by_counts"], [20, 100, 60]) |
| 141 | + assert np.allclose(data_with_qc.mod["atac"].var["total_counts"], [1111, 0, 2]) |
| 142 | + |
| 143 | + |
| 144 | +if __name__ == "__main__": |
| 145 | + sys.exit(pytest.main([__file__])) |
0 commit comments