From ec9f22816efdbbb10ec97f6f797f67dc4f7e0274 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Fri, 10 Nov 2023 12:42:43 -0600 Subject: [PATCH] feat: add plotters and rayon dependencies --- Cargo.toml | 2 + src/graph.rs | 44 +++++++++++++++-- src/graph/cluster.rs | 0 src/graph/distance.rs | 0 src/graph/load.rs | 17 +++++-- src/graph/vis.rs | 112 ++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 2 +- 7 files changed, 169 insertions(+), 8 deletions(-) create mode 100644 src/graph/cluster.rs create mode 100644 src/graph/distance.rs diff --git a/Cargo.toml b/Cargo.toml index 12335f9..aa85a89 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,6 +54,8 @@ noodles-fastq = "0.9" noodles-sam = "0.46" petgraph = { version = "0.6", features = ["serde-1"] } +plotters = { version = "0.3" } +rayon = { version = "1.8" } regex = { version = "1.10" } serde = { version = "1.0" } serde_json = { version = "1.0" } diff --git a/src/graph.rs b/src/graph.rs index cd60dc7..9abe1d7 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -1,6 +1,7 @@ use anyhow::Result; use clap::Args; use clap::ValueHint; +use log::error; use std::io::BufWriter; use std::io::Write; use std::path::PathBuf; @@ -8,20 +9,57 @@ use std::path::PathBuf; mod analysis; mod data; mod load; +// mod vis; use analysis::GraphAnalysis; +use log::info; use log::warn; +use self::data::NLGraph; + #[derive(Args, Debug)] pub struct GraphArgs { /// Graph input file - #[arg(value_hint = ValueHint::FilePath)] + #[arg(value_hint = ValueHint::AnyPath)] input: PathBuf, + + /// current threads number + #[arg(short = 't', default_value = "2")] + threads: usize, +} + +pub fn analyze(args: &GraphArgs) -> Result<()> { + if args.input.is_dir() { + info!("Analyzing graphs in directory {}", args.input.display()); + rayon::ThreadPoolBuilder::new() + .num_threads(args.threads) + .build_global() + .unwrap(); + + let mut nlgraphs = load::load_cygraph_from_directory(&args.input)?; + return analyze_nlgraphs(&mut nlgraphs); + } else if args.input.is_file() { + info!("Analyzing graph in file {}", args.input.display()); + let mut nlgraph = load::load_cygraph_from_file(&args.input)?; + return analyze_nlgraph(&mut nlgraph); + } + error!("Input is not a file or directory"); + Ok(()) } -pub fn analyze_nlgraph(args: &GraphArgs) -> Result<()> { - let mut nlgraph = load::load_cygraph_from_file(&args.input)?; +pub fn analyze_nlgraphs(nlgraphs: &mut [NLGraph]) -> Result<()> { + rayon::scope(|s| { + for nlgraph in nlgraphs.iter_mut() { + s.spawn(move |_| { + analyze_nlgraph(nlgraph).unwrap(); + }); + } + }); + + Ok(()) +} +pub fn analyze_nlgraph(nlgraph: &mut NLGraph) -> Result<()> { if !nlgraph.is_weakly_connected() { warn!("Graph is weakly connected"); return Ok(()); diff --git a/src/graph/cluster.rs b/src/graph/cluster.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/graph/distance.rs b/src/graph/distance.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/graph/load.rs b/src/graph/load.rs index 1bfbf70..19812f2 100644 --- a/src/graph/load.rs +++ b/src/graph/load.rs @@ -2,15 +2,27 @@ use anyhow::Result; use log::info; use serde_json::Value; use std::path::Path; +use walkdir::WalkDir; use std::collections::HashMap; use crate::graph::data::{EdgeData, NLGraph, NodeData}; +pub fn load_cygraph_from_directory>(directory: P) -> Result> { + WalkDir::new(directory) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.path().is_file() && e.path().extension().unwrap() == "json") + .map(|e| load_cygraph_from_file(e.path())) + .collect::>>() +} + pub fn load_cygraph_from_file>(file: P) -> Result { let reader = std::io::BufReader::new(std::fs::File::open(file.as_ref())?); let data: Value = serde_json::from_reader(reader)?; - load_cygraph_from_json(data) + let result = load_cygraph_from_json(data); + info!("load nlgraph from json {}", file.as_ref().display()); + result } pub fn load_cygraph_from_json(data: Value) -> Result { @@ -38,9 +50,6 @@ pub fn load_cygraph_from_json(data: Value) -> Result { let _index = graph.add_edge(*source, *target, edge_data); } - info!("Added {} nodes", graph.node_count()); - info!("Added {} edges", graph.edge_count()); - Ok(graph) } diff --git a/src/graph/vis.rs b/src/graph/vis.rs index 8b13789..a7d10bf 100644 --- a/src/graph/vis.rs +++ b/src/graph/vis.rs @@ -1 +1,113 @@ +use anyhow::Result; +use plotters::prelude::*; +// create density plot +pub fn density_plot(data: &[f32]) -> Result<()> { + // Define the dimensions of the plot + const WIDTH: u32 = 800; + const HEIGHT: u32 = 600; + + // Create a drawing backend + let root = BitMapBackend::new("density_plot.png", (WIDTH, HEIGHT)).into_drawing_area(); + root.fill(&WHITE)?; + + let mut chart = ChartBuilder::on(&root) + .caption("Density Plot", ("sans-serif", 40).into_font()) + .margin(10) + .x_label_area_size(30) + .y_label_area_size(30) + .build_cartesian_2d(0.0..1.0, 0.0..10.0)?; // Adjust the range accordingly + + chart.configure_mesh().draw()?; + + // Calculate the density + let min = *data + .iter() + .min_by(|a, b| a.partial_cmp(b).unwrap()) + .unwrap(); + let max = *data + .iter() + .max_by(|a, b| a.partial_cmp(b).unwrap()) + .unwrap(); + let range = max - min; + let step = range / WIDTH as f32; + + let mut densities = vec![0; WIDTH as usize]; + + for &value in data { + let index = ((value - min) / step).floor() as usize; + densities[index] = densities[index].saturating_add(1); + } + + // Normalize the densities + let max_density = *densities.iter().max().unwrap() as f32; + for density in &mut densities { + *density = (*density as f32 / max_density * 10.0).round() as i32; // Scale it to fit the Y axis + } + + // Draw the densities + chart.draw_series(densities.into_iter().enumerate().map(|(x, y)| { + Rectangle::new( + [ + (x as f32 * step + min, 0.0), + (x as f32 * step + min + step, y as f32), + ], + RED.filled(), + ) + }))?; + + // Make sure the data is rendered + root.present()?; + + Ok(()) +} + +// Function to create a histogram +pub fn histogram(data: &[f32]) -> Result<(), Box> { + const WIDTH: u32 = 800; + const HEIGHT: u32 = 600; + const NUM_BINS: usize = 50; // Adjust the number of bins as needed + + // Create a drawing backend + let root = BitMapBackend::new("histogram.png", (WIDTH, HEIGHT)).into_drawing_area(); + root.fill(&WHITE)?; + + let (min, max) = data + .iter() + .fold((f32::INFINITY, f32::NEG_INFINITY), |(min, max), &val| { + (min.min(val), max.max(val)) + }); + + let bin_size = (max - min) / NUM_BINS as f32; + let mut bins = vec![0; NUM_BINS]; + + for &value in data { + let bin = ((value - min) / bin_size).min(NUM_BINS as f32 - 1.0) as usize; + bins[bin] += 1; + } + + let max_count = *bins.iter().max().unwrap() as f32; + + // Create a chart + let mut chart = ChartBuilder::on(&root) + .caption("Histogram", ("sans-serif", 40).into_font()) + .margin(10) + .x_label_area_size(30) + .y_label_area_size(30) + .build_cartesian_2d(min..max, 0.0..max_count)?; + + chart.configure_mesh().draw()?; + + // Plot the bins + chart.draw_series(Histogram::vertical(&chart).style(RED.filled()).data( + bins.into_iter().enumerate().map(|(i, count)| { + let x0 = min + i as f32 * bin_size; + (x0..x0 + bin_size, count as f32) + }), + ))?; + + // Make sure the data is rendered + root.present()?; + + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index 8403db5..2f92ba9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -160,7 +160,7 @@ fn main() { Some(Commands::Graph(args)) => { info!("'graph' {args:?} "); - graph::analyze_nlgraph(args).unwrap(); + graph::analyze(args).unwrap(); } // If no subcommand was used, it's a normal top level command