Skip to content

Commit

Permalink
Merge pull request #69 from quambene/overwrite-existing-bookmark-cache
Browse files Browse the repository at this point in the history
Implement fixes and improvements
  • Loading branch information
quambene authored Jan 14, 2024
2 parents 0a9db70 + 68880d8 commit 2a2104b
Show file tree
Hide file tree
Showing 23 changed files with 199 additions and 140 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/rust-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- name: cargo test
run: cargo test --locked
- name: cargo test --lib
run: cargo test --lib --locked
integration-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- name: cargo test --test '*' --features integration-test
run: cargo test --locked --test '*' --features integration-test
- name: cargo test --test '*'
run: cargo test --test '*' --locked
os-test:
runs-on: ${{ matrix.os }}
name: os-test / ${{ matrix.os }}
Expand All @@ -67,10 +67,10 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- name: cargo test
run: cargo test --locked
- name: cargo test --test '*' --features integration-test
run: cargo test --locked --test '*' --features integration-test
- name: cargo test --lib
run: cargo test --lib --locked
- name: cargo test --test '*'
run: cargo test --test '*' --locked
doc-test:
runs-on: ubuntu-latest
steps:
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@
- added
- Add `action` to `TargetBookmark`
- Add benchmarks for fetching
- Take ignored urls into account in `bogrep import`
- changed
- Update to rust 1.75
- Fix duplicate cache files for `bogrep fetch --urls`
- Fix report of processed bookmarks
- removed
- Remove `integration-test` feature

### v0.6.1

Expand Down
3 changes: 0 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ categories = ["command-line-utilities", "text-processing"]
readme = "README.md"
license = "Apache-2.0"

[features]
integration-test = []

[[bench]]
name = "fetch"
harness = false
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,12 @@ You can configure the configuration path via the environment variable
## Testing

``` bash
# Run unit tests
# Run unit tests and integration tests
cargo test

# Run integration tests
cargo test --test '*' --features integration-test
# Run unit tests
cargo test --lib

# Run unit and integration tests
cargo test --features integration-test
# Run integration tests
cargo test --test '*'
```
14 changes: 7 additions & 7 deletions benches/fetch.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use bogrep::{
cmd, errors::BogrepError, html, Cache, CacheMode, Caching, Fetch, MockClient, TargetBookmark,
TargetBookmarks,
cmd, errors::BogrepError, html, Action, Cache, CacheMode, Caching, Fetch, MockClient,
TargetBookmark, TargetBookmarks,
};
use chrono::Utc;
use criterion::{criterion_group, criterion_main, Criterion};
Expand Down Expand Up @@ -68,15 +68,15 @@ async fn fetch_concurrently(max_concurrent_requests: usize) {
None,
HashSet::new(),
HashSet::new(),
bogrep::Action::Fetch,
bogrep::Action::FetchAndReplace,
),
);
}

let mut bookmarks = TargetBookmarks::new(bookmarks);
assert_eq!(bookmarks.len(), 10000);

cmd::fetch_and_cache_bookmarks(
cmd::process_bookmarks(
&client,
&cache,
bookmarks.values_mut().collect(),
Expand Down Expand Up @@ -110,7 +110,7 @@ async fn fetch_in_parallel(max_parallel_requests: usize) {
None,
HashSet::new(),
HashSet::new(),
bogrep::Action::Fetch,
Action::FetchAndReplace,
),
);
}
Expand All @@ -124,12 +124,12 @@ async fn fetch_in_parallel(max_parallel_requests: usize) {
let client = Arc::new(client);
let cache = Arc::new(cache);

fetch_and_cache_bookmarks_in_parallel(client, cache, &bookmarks, max_parallel_requests, true)
process_bookmarks_in_parallel(client, cache, &bookmarks, max_parallel_requests, true)
.await
.unwrap();
}

pub async fn fetch_and_cache_bookmarks_in_parallel(
pub async fn process_bookmarks_in_parallel(
client: Arc<impl Fetch + Send + Sync + 'static>,
cache: Arc<impl Caching + Send + Sync + 'static>,
bookmarks: &[Arc<Mutex<TargetBookmark>>],
Expand Down
4 changes: 2 additions & 2 deletions src/bookmarks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ use uuid::Uuid;
pub enum Action {
/// Fetch and cache the bookmark, even if it is cached already. The cached
/// content will be updated with the most recent version of the website.
Fetch,
FetchAndReplace,
/// Fetch and cache bookmark if it is not cached yet.
Add,
FetchAndAdd,
/// Remove bookmark from cache.
Remove,
/// No actions to be performed.
Expand Down
86 changes: 81 additions & 5 deletions src/bookmarks/target_bookmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ impl TargetBookmark {
action,
}
}

pub fn set_action(&mut self, action: Action) {
self.action = action;
}

pub fn set_source(&mut self, source: SourceType) {
self.sources.insert(source);
}
}

impl From<JsonBookmark> for TargetBookmark {
Expand Down Expand Up @@ -141,8 +149,13 @@ impl TargetBookmarks {
let url = entry.key().clone();
let target_bookmark = entry.into_mut();
debug!("Overwrite duplicate target bookmark: {}", url);
// TODO: use existing bookmark id and bookmark urls
*target_bookmark = bookmark;

// We are keeping the existing id and url, but overwriting all other fields.
target_bookmark.last_imported = bookmark.last_imported;
target_bookmark.last_cached = bookmark.last_cached;
target_bookmark.sources = bookmark.sources;
target_bookmark.cache_modes = bookmark.cache_modes;
target_bookmark.action = bookmark.action;
}
Entry::Vacant(entry) => {
entry.insert(bookmark);
Expand Down Expand Up @@ -172,6 +185,14 @@ impl TargetBookmarks {
}
}

pub fn ignore_urls(&mut self, ignored_urls: &[String]) {
for url in ignored_urls {
if let Some(target_bookmark) = self.get_mut(url) {
target_bookmark.set_action(Action::Remove)
}
}
}

pub fn filter_to_add<'a>(
&self,
source_bookmarks: &'a SourceBookmarks,
Expand Down Expand Up @@ -214,7 +235,7 @@ impl TargetBookmarks {
None,
bookmark.sources.to_owned(),
HashSet::new(),
Action::Add,
Action::FetchAndAdd,
);
self.insert(target_bookmark);
}
Expand Down Expand Up @@ -368,12 +389,67 @@ mod tests {
let res = target_bookmarks.update(&source_bookmarks);
assert!(res.is_ok());
assert_eq!(target_bookmarks.get(url1).unwrap().action, Action::None);
assert_eq!(target_bookmarks.get(url2).unwrap().action, Action::Add);
assert_eq!(
target_bookmarks.get(url2).unwrap().action,
Action::FetchAndAdd
);
assert_eq!(target_bookmarks.get(url3).unwrap().action, Action::None);
assert_eq!(target_bookmarks.get(url4).unwrap().action, Action::Add);
assert_eq!(
target_bookmarks.get(url4).unwrap().action,
Action::FetchAndAdd
);
assert_eq!(target_bookmarks.get(url5).unwrap().action, Action::Remove);
}

#[test]
fn test_ignore_urls() {
let now = Utc::now();
let url1 = "https://url1.com";
let url2 = "https://url2.com";
let url3 = "https://url3.com";
let ignored_urls = vec![url1.to_owned(), url3.to_owned()];
let mut target_bookmarks = TargetBookmarks::new(HashMap::from_iter([
(
url1.to_owned(),
TargetBookmark::new(
url1,
now,
None,
HashSet::new(),
HashSet::new(),
Action::None,
),
),
(
url2.to_owned(),
TargetBookmark::new(
url2,
now,
None,
HashSet::new(),
HashSet::new(),
Action::None,
),
),
(
url3.to_owned(),
TargetBookmark::new(
url3,
now,
None,
HashSet::new(),
HashSet::new(),
Action::None,
),
),
]));

target_bookmarks.ignore_urls(&ignored_urls);
assert!(target_bookmarks.get(url1).unwrap().action == Action::Remove);
assert!(target_bookmarks.get(url2).unwrap().action == Action::None);
assert!(target_bookmarks.get(url3).unwrap().action == Action::Remove);
}

#[test]
fn test_read_target_bookmarks() {
let expected_bookmarks = EXPECTED_BOOKMARKS.as_bytes().to_vec();
Expand Down
1 change: 0 additions & 1 deletion src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ pub trait Caching {
fn exists(&self, bookmark: &TargetBookmark) -> bool;

/// Open the cached file for a bookmark.
// TODO: return `Result<Option<impl Read>, anyhow::Error>` (see <https://github.com/rust-lang/rust/issues/91611>).
fn open(&self, bookmark: &TargetBookmark) -> Result<Option<impl Read>, BogrepError>;

/// Get the content of a bookmark from cache.
Expand Down
Loading

0 comments on commit 2a2104b

Please sign in to comment.