Skip to content

Commit

Permalink
146 finish remaining python api (#148)
Browse files Browse the repository at this point in the history
* include pyo3 anyhow

* include BLAT and ucsc module for python module

* included python3 ucsc in modlist

* include into py dict for Blat object

* ran cargo fmt

* include pytest justfile

* add blat testing

* add python seq function to module

* pass in transcribe dereferenced

* include python seq in modlist

* use fastarecord and fastarecords to resultseq container and resultseq

* include fastarecord and fastarecords to uniprotinfo and container

* include fasta to utils

* include fasta record and records to utils

* added python submodule function for seq

* include testing for python seq command

* python API for info

* include python in info modlist

* implement into py dict for info container and info

* include python info in modlist for python api

* added function checking to mandate list for python seq

* include nolist testing in seq

* python testing for info

* added python blast to blast modlist

* added into py dict tract for blast result

* included python blast into python module

* api for python blast

* remove evalue testing since multiple are possible

* added testing for python blast api

* will add this back in once megablast is stable

* bump version
  • Loading branch information
noamteyssier authored Nov 1, 2022
1 parent 7663e88 commit 7bd4761
Show file tree
Hide file tree
Showing 30 changed files with 470 additions and 38 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "ggetrs"
version = "0.1.56"
version = "0.1.57"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -19,7 +19,7 @@ clap_complete = "4.0.3"
ftp = "3.0.1"
futures = "0.3.24"
mysql = "22.2.0"
pyo3 = { version = "0.16.5", features = ["extension-module"] }
pyo3 = { version = "0.16.5", features = ["extension-module", "anyhow"] }
regex = "1.6.0"
reqwest = { version = "0.11.11", features = ["json", "multipart", "blocking"] }
serde = { version = "1.0.144", features = ["derive"] }
Expand Down
3 changes: 3 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ doc:
test:
cargo nextest run --retries 10

pytest:
pytest -v --reruns 10 --reruns-delay 1

lint: build
cargo clippy -- \
-W clippy::pedantic \
Expand Down
4 changes: 0 additions & 4 deletions src/blast/functions/blast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,5 @@ mod testing {
assert_eq!(result.results()[0].query_end, 120);
assert_eq!(result.results()[0].subject_start, 4992);
assert_eq!(result.results()[0].subject_end, 5111);
assert!(
result.results()[0].evalue == 6.96927e-54 ||
result.results()[0].evalue == 6.97848e-54
); // results are returned in one of two orders
}
}
2 changes: 2 additions & 0 deletions src/blast/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
pub mod cli;
pub mod functions;
pub mod types;
mod python;
pub use python::python_blast;
43 changes: 43 additions & 0 deletions src/blast/python.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
use anyhow::{Result, bail};
use clap::ValueEnum;
use pyo3::{Python, types::{PyDict, IntoPyDict}, pyfunction};
use super::{types::{BlastProgram, BlastDatabase}, functions::blast};

#[pyfunction(name = "blast")]
#[pyo3(text_signature = "(query, program = None, database = None, limit = 50, expect = 10.0, low_comp_filter = False, megablast = True)")]
pub fn python_blast<'py>(
py: Python<'py>,
query: &str,
program: Option<String>,
database: Option<String>,
limit: Option<usize>,
expect: Option<f64>,
low_comp_filter: Option<bool>,
megablast: Option<bool>,
) -> Result<&'py PyDict> {

let program = match program {
Some(program_str) => if let Ok(s) = BlastProgram::from_str(&program_str, true) {
Some(s)
} else {
bail!("Could not assign blast program from input")
},
None => None
};

let database = match database {
Some(database_str) => if let Ok(s) = BlastDatabase::from_str(&database_str, true) {
Some(s)
} else {
bail!("Could not assign blast database from input")
},
None => None
};

let limit = limit.unwrap_or(50);
let expect = expect.unwrap_or(10.0);
let low_comp_filter = low_comp_filter.unwrap_or(false);
let megablast = megablast.unwrap_or(true);
let response = blast(query, &program, &database, limit, expect, low_comp_filter, megablast)?;
Ok(response.into_py_dict(py))
}
38 changes: 37 additions & 1 deletion src/blast/types/result.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use pyo3::types::{IntoPyDict, PyDict};
use serde::{Deserialize, Serialize};
use std::fmt;

Expand All @@ -15,6 +16,21 @@ impl fmt::Display for BlastResult {
)
}
}
impl IntoPyDict for BlastResult {
fn into_py_dict(self, py: pyo3::Python<'_>) -> &PyDict {
let map = PyDict::new(py);
map.set_item("query", self.query).unwrap();
map.set_item(
"results",
self.results
.iter()
.map(|x| x.clone())
.map(|x| x.into_py_dict(py))
.collect::<Vec<&PyDict>>()
).unwrap();
map
}
}
impl BlastResult {
pub fn from_blast_output(output: &BlastOutput, query: &str) -> Self {
Self {
Expand All @@ -37,7 +53,7 @@ impl BlastResult {
}
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct BlastHit {
pub num: usize,
pub id: String,
Expand Down Expand Up @@ -75,6 +91,26 @@ impl BlastHit {
}
}
}
impl IntoPyDict for BlastHit {
fn into_py_dict(self, py: pyo3::Python<'_>) -> &pyo3::types::PyDict {
let map = PyDict::new(py);
map.set_item("num", self.num).unwrap();
map.set_item("id", self.num).unwrap();
map.set_item("definition", self.num).unwrap();
map.set_item("accession", self.num).unwrap();
map.set_item("length", self.num).unwrap();
map.set_item("bit_score", self.num).unwrap();
map.set_item("score", self.num).unwrap();
map.set_item("evalue", self.num).unwrap();
map.set_item("gap_opens", self.num).unwrap();
map.set_item("alignment_length", self.num).unwrap();
map.set_item("query_start", self.num).unwrap();
map.set_item("query_end", self.num).unwrap();
map.set_item("subject_start", self.num).unwrap();
map.set_item("subject_end", self.num).unwrap();
map
}
}

#[derive(Debug, Serialize, Deserialize)]
pub struct BlastOutput {
Expand Down
2 changes: 1 addition & 1 deletion src/cli/cli.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::{ModArchS4, ModChembl, ModEnsembl, ModNcbi, ModPdb, ModUcsc, ModUniprot, ModEnrichr};
use super::{ModArchS4, ModChembl, ModEnrichr, ModEnsembl, ModNcbi, ModPdb, ModUcsc, ModUniprot};
use crate::{
blast::types::{BlastDatabase, BlastProgram},
ensembl::ENSEMBL_RELEASE_STR,
Expand Down
5 changes: 1 addition & 4 deletions src/cli/enrichr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use clap::Subcommand;

#[derive(Subcommand)]
pub enum ModEnrichr {

Enrichr {
/// any database listed at: https://maayanlab.cloud/Enrichr/#libraries
/// some shorthands include: pathway, transcription, ontology, diseases_drugs, celltypes,
Expand All @@ -21,7 +20,6 @@ pub enum ModEnrichr {

/// List all available libraries and their descriptions
List {

/// Return library names in plaintext
#[clap(short, long)]
minimal: bool,
Expand All @@ -37,6 +35,5 @@ pub enum ModEnrichr {
/// optional filepath to write output to [default=stdout]
#[clap(short, long)]
output: Option<String>,
}

},
}
7 changes: 6 additions & 1 deletion src/enrichr/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ pub fn launch_enrichr(library: &str, gene_list: &[String], output: &Option<Strin
}

/// Main entrypoint for listing all available libraries in `Enrichr`
pub fn launch_enrichr_list(minimal: bool, list_categories: bool, category: &Option<usize>, output: &Option<String>) -> Result<()> {
pub fn launch_enrichr_list(
minimal: bool,
list_categories: bool,
category: &Option<usize>,
output: &Option<String>,
) -> Result<()> {
let libraries = get_libraries()?;

let output_str = if list_categories {
Expand Down
4 changes: 2 additions & 2 deletions src/enrichr/functions/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
mod add_list;
mod enrich;
mod get_libraries;
mod view_list;
mod shorthand;
mod view_list;
pub use add_list::add_list;
pub use enrich::enrich;
pub use get_libraries::get_libraries;
pub use view_list::view_list;
pub use shorthand::shorthand;
pub use view_list::view_list;
6 changes: 3 additions & 3 deletions src/enrichr/functions/shorthand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ pub fn shorthand(library: &str) -> String {
"diseases_drugs" => "GWAS_Catalog_2019",
"celltypes" => "PanglaoDB_Augmented_2021",
"kinase_interactions" => "KEA_2015",
x => x
}.to_string()
x => x,
}
.to_string()
}

#[cfg(test)]
mod testing {
use super::shorthand;


#[test]
fn test_shorthand_pathway() {
let lib = "pathway";
Expand Down
5 changes: 2 additions & 3 deletions src/enrichr/types/library.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ impl Libraries {
}
}


/// An instance of category contained within `Enrichr`
///
/// Data is stored as a json at <https://maayanlab.cloud/Enrichr/datasetStatistics>
Expand Down Expand Up @@ -91,7 +90,6 @@ impl fmt::Display for Categories {
}
}


/// All libraries contained within `Enrichr`.
///
/// The `statistics` attribute is a container of all known [Library].
Expand All @@ -109,7 +107,8 @@ impl ResponseLibraries {
Categories(self.categories.to_owned())
}
pub fn filter_categories(&self, cid: usize) -> Libraries {
let libraries = self.statistics
let libraries = self
.statistics
.iter()
.filter(|x| x.category_id == cid)
.map(|x| x.to_owned())
Expand Down
2 changes: 2 additions & 0 deletions src/info/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
mod cli;
pub mod functions;
pub mod types;
mod python;
pub use cli::launch_info;
pub use functions::info;
pub use types::Info;
pub use python::python_info;
23 changes: 23 additions & 0 deletions src/info/python.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
use anyhow::{bail, Result};
use pyo3::{types::{PyDict, IntoPyDict}, Python, pyfunction};
use super::info;


#[pyfunction(name = "info")]
#[pyo3(text_signature = "(search_terms, species = 'homo_sapiens', taxon_id = 9606)")]
pub fn python_info<'py>(
py: Python<'py>,
search_terms: Vec<String>,
species: Option<String>,
taxon_id: Option<usize>,
) -> Result<&'py PyDict> {
if search_terms.len() == 0 {
bail!("Must pass in more than one search term!");
} else if search_terms[0].len() == 1 {
bail!("Must pass in search terms as a list!");
}
let species = species.unwrap_or("homo_sapiens".to_string());
let taxon_id = taxon_id.unwrap_or(9606);
let results = info(&search_terms, &species, taxon_id)?;
Ok(results.into_py_dict(py))
}
31 changes: 30 additions & 1 deletion src/info/types.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{
ensembl::types::LookupResponse, ncbi::types::NcbiResults, uniprot::UniprotInfoContainer,
};
use pyo3::types::{IntoPyDict, PyDict};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fmt;
Expand All @@ -17,6 +18,17 @@ impl fmt::Display for InfoContainer {
)
}
}
impl IntoPyDict for InfoContainer {
fn into_py_dict(self, py: pyo3::Python<'_>) -> &PyDict {
let map = PyDict::new(py);
self.0
.iter()
.for_each(|(k, v)| {
map.set_item(k, v.clone().into_py_dict(py)).unwrap();
});
map
}
}
impl InfoContainer {
#[must_use]
pub fn from_queries(
Expand All @@ -36,7 +48,7 @@ impl InfoContainer {
}

/// Container which aggregates query results from multiple databases
#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Info {
ensembl_id: String,
uniprot_id: String,
Expand All @@ -58,6 +70,23 @@ impl fmt::Display for Info {
)
}
}
impl IntoPyDict for Info {
fn into_py_dict(self, py: pyo3::Python<'_>) -> &pyo3::types::PyDict {
let map = PyDict::new(py);
map.set_item("ensembl_id", &self.ensembl_id).unwrap();
map.set_item("uniprot_id", &self.uniprot_id).unwrap();
map.set_item("ncbi_id", &self.ncbi_id).unwrap();
map.set_item("symbol", &self.symbol).unwrap();
map.set_item("pdb_id", &self.pdb_id).unwrap();
map.set_item("ensembl_description", &self.ensembl_description).unwrap();
map.set_item("uniprot_description", &self.uniprot_description).unwrap();
map.set_item("ncbi_description", &self.ncbi_description).unwrap();
map.set_item("species", &self.species).unwrap();
map.set_item("assembly_name", &self.assembly_name).unwrap();
map
}

}
impl Info {
#[must_use]
pub fn from_queries(
Expand Down
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ fn ggetrs(py: Python<'_>, module: &PyModule) -> PyResult<()> {
module.add_function(wrap_pyfunction!(enrichr::python_enrichr, module)?)?;
archs4::python_archs4(py, module)?;
ensembl::python_ensembl(py, module)?;
ucsc::python_ucsc(py, module)?;
module.add_function(wrap_pyfunction!(ensembl::python_ensembl_search, module)?)?;
module.add_function(wrap_pyfunction!(seq::python_seq, module)?)?;
module.add_function(wrap_pyfunction!(info::python_info, module)?)?;
module.add_function(wrap_pyfunction!(blast::python_blast, module)?)?;
Ok(())
}
16 changes: 12 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ use ggetrs::{
archs4::{launch_archs4_correlation, launch_archs4_tissue},
blast::cli::launch_blast,
chembl::launch_chembl_activity,
cli::{Cli, Commands, ModArchS4, ModChembl, ModEnsembl, ModNcbi, ModPdb, ModUcsc, ModUniprot, ModEnrichr},
cli::{
Cli, Commands, ModArchS4, ModChembl, ModEnrichr, ModEnsembl, ModNcbi, ModPdb, ModUcsc,
ModUniprot,
},
enrichr::{launch_enrichr, launch_enrichr_list},
ensembl::{
launch_ensembl_database, launch_ensembl_list_species, launch_ensembl_lookup_id,
Expand All @@ -30,8 +33,13 @@ fn main() -> Result<(), RequestError> {
output,
} => {
launch_enrichr(library, gene_list, output)?;
},
ModEnrichr::List { minimal, list_categories, category, output } => {
}
ModEnrichr::List {
minimal,
list_categories,
category,
output,
} => {
launch_enrichr_list(*minimal, *list_categories, category, output)?;
}
},
Expand Down Expand Up @@ -206,7 +214,7 @@ fn main() -> Result<(), RequestError> {
species,
output,
} => {
launch_seq(search_terms, &transcribe, species, output)?;
launch_seq(search_terms, *transcribe, species, output)?;
}
Commands::Blast {
query,
Expand Down
Loading

0 comments on commit 7bd4761

Please sign in to comment.