Skip to content

Commit

Permalink
Fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
kvg committed Jan 8, 2024
1 parent f453a66 commit aabfe8a
Show file tree
Hide file tree
Showing 8 changed files with 411 additions and 367 deletions.
29 changes: 29 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
BSD 3-Clause License

Copyright (c) 2019, Broad Institute
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 changes: 43 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
variantreviewparty
"""""""

|GitHub release| |Generic badge| |PyPI version variantreviewparty|

.. |GitHub release| image:: https://img.shields.io/github/release/broadinstitute/variantreviewparty.svg
:target: https://github.com/broadinstitute/variantreviewparty/releases/

.. |Generic badge| image:: https://img.shields.io/badge/Docker-v0.0.1-blue.svg
:target: https://console.cloud.google.com/gcr/images/broad-dsp-lrma/US/lr-variantreviewparty

.. |PyPI version variantreviewparty| image:: https://img.shields.io/pypi/v/variantreviewparty.svg
:target: https://pypi.python.org/pypi/variantreviewparty/

VariantReviewParty is a python library for viewing read level data spanning variants across thousands of samples.

Documentation for the ``VariantReviewParty`` API can be found on the `documentation page <https://broadinstitute.github.io/variantreviewparty/>`_.

Installation
------------

``pip`` is recommended for VariantReviewParty installation.

::

pip install variantreviewparty

For a pre-built version including all dependencies, access our Docker image.

::

git clone https://github.com/broadinstitute/variantreviewparty.git
pip install -e variantreviewparty/

Getting help
------------

If you encounter bugs or have questions/comments/concerns, please file an issue on our `Github page <https://github.com/broadinstitute/variantreviewparty/issues>`_.

Developers' guide
-----------------

For information on contributing to VariantReviewParty development, visit our `developer documentation <DEVELOP.md>`_.
586 changes: 261 additions & 325 deletions playground.ipynb

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[build-system]
requires = ["maturin>=1.0,<2.0"]
build-backend = "maturin"

[project]
name = "genomeshader"

[tool.maturin]
python-source = "python"
# "extension-module" tells pyo3 we want to build an extension module (skips linking against libpython.so)
features = ["pyo3/extension-module"]
65 changes: 38 additions & 27 deletions python/genomeshader/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
import warnings
from enum import Enum
from typing import Union, List

import polars as pl
import holoviews as hv
Expand Down Expand Up @@ -37,16 +38,21 @@ def __init__(self,
self.session_name = session_name

if gcs_session_dir is None:
bucket = os.environ['GOOGLE_BUCKET']
gcs_session_dir = f"{bucket}/GenomeShader/{session_name}"
if 'GOOGLE_BUCKET' in os.environ:
bucket = os.environ['GOOGLE_BUCKET']
gcs_session_dir = f"{bucket}/GenomeShader/{session_name}"
else:
raise ValueError(
"gcs_session_dir is None and "
"GOOGLE_BUCKET is not set in environment variables"
)

self._validate_gcs_session_dir(gcs_session_dir)
self.gcs_session_dir = gcs_session_dir

self.genome_build: GenomeBuild = genome_build

self.reads = set()
self.loci = set()
self._session = _init()

def _validate_gcs_session_dir(self, gcs_session_dir: str):
gcs_pattern = re.compile(
Expand All @@ -68,38 +74,37 @@ def _validate_session_name(self, session_name: str):

def __str__(self):
return (
f'GenomeShader: '
f'session_name={self.session_name}, '
f'gcs_session_dir={self.gcs_session_dir}, '
f'genome_build={self.genome_build}'
f'GenomeShader:\n'
f' - session_name: {self.session_name}\n'
f' - gcs_session_dir: {self.gcs_session_dir}\n'
f' - genome_build: {self.genome_build}\n'
)

def get_session_name(self):
return self.session_name

def attach_reads(self, gcs_path: str):
if gcs_path.endswith('.bam') or gcs_path.endswith('.cram'):
self.reads.add(gcs_path)
else:
bams = gcs_list_files_of_type(gcs_path, ".bam")
crams = gcs_list_files_of_type(gcs_path, ".cram")
def attach_reads(self, gcs_paths: Union[str, List[str]]):
if isinstance(gcs_paths, str):
gcs_paths = [gcs_paths] # Convert single string to list

self.reads.update(bams)
self.reads.update(crams)
for gcs_path in gcs_paths:
if gcs_path.endswith(".bam") or gcs_path.endswith(".cram"):
self._session.attach_reads([gcs_path])
else:
bams = _gcs_list_files_of_type(gcs_path, ".bam")
crams = _gcs_list_files_of_type(gcs_path, ".cram")

def attach_locus(self, locus: str):
pieces = re.split("[:-]", re.sub(",", "", locus))
self._session.attach_reads(bams)
self._session.attach_reads(crams)

chr = pieces[0]
start = int(pieces[1])
stop = int(pieces[2]) if len(pieces) > 2 else start

self.loci.add((chr, start, stop))
def attach_loci(self, loci: Union[str, List[str]]):
if isinstance(loci, str):
self._session.attach_loci([loci])
else:
self._session.attach_loci(loci)

def stage(self):
df = stage_data(self.gcs_session_dir, self.reads, self.loci)

return df
self._session.stage()

def show(self,
locus: str,
Expand All @@ -116,6 +121,8 @@ def show(self,
df = pl.read_parquet(filename)
df = df.sort(["sample_name", "query_name", "reference_start"])

print(filename)

y0s = []
y0 = 0
if collapse:
Expand Down Expand Up @@ -144,6 +151,7 @@ def show(self,
y0s.append(y0)

df = df.with_columns(pl.Series(name="read_num", values=y0s))
df = df.with_columns(pl.Series(name="height", values=[1.0]*len(y0s)))

df = df.with_columns(
pl.col("read_num").alias("y0") * -1 - pl.col("height") / 2
Expand Down Expand Up @@ -180,8 +188,11 @@ def show(self,
default_tools=['reset', 'save']
)

def print(self):
self._session.print()


def init(session_name,
def init(session_name: str,
gcs_session_dir: str = None,
genome_build: GenomeBuild = GenomeBuild.GRCh38) -> GenomeShader:
session = GenomeShader(session_name=session_name,
Expand Down
2 changes: 0 additions & 2 deletions src/alignment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,6 @@ fn extract_reads(bam_path: &String, chr: String, start: u64, stop: u64) -> DataF
}

pub fn stage_data(cache_path: PathBuf, bam_paths: &HashSet<String>, loci: &HashSet<(String, u64, u64)>) -> Result<HashMap<(String, u64, u64), PathBuf>, Box<dyn std::error::Error>> {
gcs_authorize_data_access();

loci.par_iter()
.progress_count(loci.len() as u64)
.for_each(|l| {
Expand Down
36 changes: 24 additions & 12 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub mod layout;
use app::{model, update, exit};
use events::raw_window_event;
use alignment::stage_data;
use storage::gcs_list_files_of_type;
use storage::{_gcs_list_files_of_type,gcs_authorize_data_access};
use layout::*;

use std::{collections::{HashSet, HashMap}, path::PathBuf, cell::RefCell};
Expand All @@ -23,7 +23,7 @@ thread_local!(static GLOBAL_DATA: RefCell<PyDataFrame> = RefCell::new(PyDataFram

#[pyclass]
pub struct Session {
bams: HashSet<String>,
reads: HashSet<String>,
loci: HashSet<(String, u64, u64)>,
staged_data: HashMap<(String, u64, u64), PathBuf>
}
Expand All @@ -33,14 +33,24 @@ impl Session {
#[new]
fn new() -> Self {
Session {
bams: HashSet::new(),
reads: HashSet::new(),
loci: HashSet::new(),
staged_data: HashMap::new()
}
}

fn attach_bams(&mut self, bams: Vec<String>) {
self.bams = bams.into_iter().collect();
fn attach_reads(&mut self, reads: Vec<String>) -> PyResult<()> {
for read in &reads {
if !read.ends_with(".bam") && !read.ends_with(".cram") {
return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
format!("File '{}' is not a .bam or .cram file.", read)
));
}
}

self.reads = reads.into_iter().collect();

Ok(())
}

fn parse_locus(&self, locus: String) -> PyResult<(String, u64, u64)> {
Expand Down Expand Up @@ -100,9 +110,11 @@ impl Session {
}

fn stage(&mut self) -> PyResult<()> {
gcs_authorize_data_access();

let cache_path = std::env::temp_dir();

match stage_data(cache_path, &self.bams, &self.loci) {
match stage_data(cache_path, &self.reads, &self.loci) {
Ok(staged_data) => { self.staged_data = staged_data; },
Err(_) => {
return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
Expand Down Expand Up @@ -158,9 +170,9 @@ impl Session {
}

fn print(&self) {
println!("BAMs:");
for bam in &self.bams {
println!(" - {}", bam);
println!("Reads:");
for reads in &self.reads {
println!(" - {}", reads);
}

println!("Loci:");
Expand All @@ -176,7 +188,7 @@ impl Session {
}

#[pyfunction]
fn init() -> PyResult<Session> {
fn _init() -> PyResult<Session> {
Ok(Session::new())
}

Expand All @@ -185,8 +197,8 @@ fn init() -> PyResult<Session> {
/// import the module.
#[pymodule]
fn genomeshader(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(gcs_list_files_of_type, m)?)?;
m.add_function(wrap_pyfunction!(init, m)?)?;
m.add_function(wrap_pyfunction!(_gcs_list_files_of_type, m)?)?;
m.add_function(wrap_pyfunction!(_init, m)?)?;

Ok(())
}
6 changes: 5 additions & 1 deletion src/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ pub fn gcs_authorize_data_access() {
.output()
.expect("Failed to execute command");

if !output.status.success() {
panic!("{}", String::from_utf8_lossy(&output.stderr));
}

// Decode the output and remove trailing newline
let token = String::from_utf8(output.stdout)
.expect("Failed to decode output")
Expand All @@ -40,7 +44,7 @@ pub fn gcs_authorize_data_access() {
}

#[pyfunction]
pub fn gcs_list_files_of_type(path: String, suffix: &str) -> PyResult<Vec<String>> {
pub fn _gcs_list_files_of_type(path: String, suffix: &str) -> PyResult<Vec<String>> {
let file_list = gcs_list_files(&path).unwrap();

let bam_files: Vec<_> = file_list.iter().flat_map(|fs| {
Expand Down

0 comments on commit aabfe8a

Please sign in to comment.