generated from JadeCara/rust_setup
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Jade Wibbels
authored and
Jade Wibbels
committed
Jan 3, 2025
1 parent
1465985
commit 4eadd28
Showing
3 changed files
with
124 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
[package] | ||
name = "webcrawl-rayon" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
[dependencies] | ||
wikipedia = "0.3.4" | ||
rayon = "1.7.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
SHELL := /bin/bash | ||
.PHONY: help | ||
|
||
help: | ||
@grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' | ||
|
||
clean: ## Clean the project using cargo | ||
cargo clean | ||
|
||
build: ## Build the project using cargo | ||
cargo build | ||
|
||
run: ## Run the project using cargo | ||
cargo run | ||
|
||
test: ## Run the tests using cargo | ||
cargo test | ||
|
||
lint: ## Run the linter using cargo | ||
@rustup component add clippy 2> /dev/null | ||
cargo clippy | ||
|
||
format: ## Format the code using cargo | ||
@rustup component add rustfmt 2> /dev/null | ||
cargo fmt | ||
|
||
release: | ||
cargo build --release | ||
|
||
all: format lint test run | ||
|
||
bump: ## Bump the version of the project | ||
@echo "Current version is $(shell cargo pkgid | cut -d# -f2)" | ||
@read -p "Enter the new version: " version; \ | ||
updated_version=$$(cargo pkgid | cut -d# -f2 | sed "s/$(shell cargo pkgid | cut -d# -f2)/$$version/"); \ | ||
sed -i -E "s/^version = .*/version = \"$$updated_version\"/" Cargo.toml | ||
@echo "Version bumped to $$(cargo pkgid | cut -d# -f2)" | ||
rm Cargo.toml-e |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/* | ||
* Uses wikipedia crate to fetch pages | ||
* Processes page content | ||
* Collects timing metrics | ||
* Concurrent page processing | ||
* Shows crate usage and concurrency in Rust | ||
*/ | ||
|
||
use rayon::prelude::*; | ||
use wikipedia::http::default::Client; | ||
use wikipedia::Page; | ||
use wikipedia::Wikipedia; | ||
|
||
struct ProcessedPage { | ||
title: String, | ||
data: String, | ||
} | ||
|
||
const PAGES: [&str; 9] = [ | ||
"Giannis Antetokounmpo", | ||
"James Harden", | ||
"Russell Westbrook", | ||
"Stephen Curry", | ||
"Kevin Durant", | ||
"LeBron James", | ||
"Kobe Bryant", | ||
"Michael Jordan", | ||
"Shaquille O'Neal", | ||
]; | ||
|
||
fn process_page(page: &Page<Client>) -> ProcessedPage { | ||
let title = page.get_title().unwrap(); | ||
let content = page.get_content().unwrap(); | ||
ProcessedPage { | ||
title, | ||
data: content, | ||
} | ||
} | ||
|
||
//times how long it takes to process the pages and total time | ||
fn main() { | ||
//start timer | ||
let start = std::time::Instant::now(); | ||
let wikipedia = Wikipedia::<Client>::default(); | ||
let pages: Vec<_> = PAGES | ||
.par_iter() //parallel iterator | ||
.map(|&p| wikipedia.page_from_title(p.to_string())) | ||
.collect(); | ||
|
||
let processed_pages: Vec<ProcessedPage> = pages.par_iter().map(process_page).collect(); | ||
for page in processed_pages { | ||
//time how long it takes to process each page | ||
let start_page = std::time::Instant::now(); | ||
|
||
println!("Title: {}", page.title.as_str()); | ||
//grab first sentence of the page | ||
let first_sentence = page.data.split('.').next().unwrap(); | ||
println!("First sentence: {}", first_sentence); | ||
//count the number of words in the page | ||
let word_count = page.data.split_whitespace().count(); | ||
println!("Word count: {}", word_count); | ||
//prints time it took to process each page | ||
println!("Page time: {:?}", start_page.elapsed()); | ||
} | ||
//descriptive statistics of: total time, average time per page, and total number of pages, as well as the number of threads used | ||
println!("Total time: {:?}", start.elapsed()); | ||
println!( | ||
"Average time per page: {:?}", | ||
start.elapsed() / PAGES.len() as u32 | ||
); | ||
println!("Total number of pages: {}", PAGES.len()); | ||
println!("Number of threads: {}", rayon::current_num_threads()); | ||
} |