Skip to content

Commit

Permalink
perf: Optmize many parse_* fns
Browse files Browse the repository at this point in the history
Some unintuitive and some hard to measure differences, but overall significant perf gains. Aggregate
perf is back to being as good as the previous emlop release (and better than qlop), despite the new
features.
  • Loading branch information
vincentdephily committed Feb 5, 2025
1 parent 03b6810 commit 3b1238d
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 50 deletions.
104 changes: 54 additions & 50 deletions src/parse/history.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::{datetime::fmt_utctime, Show, TimeBound};
use anyhow::{bail, ensure, Context, Error};
use flate2::read::GzDecoder;
use log::*;
use memchr::memchr;
use memchr::{memchr, memrchr, memrchr2};
use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
use std::{fs::File,
io::{BufRead, BufReader},
Expand Down Expand Up @@ -177,7 +177,7 @@ pub fn get_hist(file: &str,
}
}
},
// Could be invalid UTF8, system read error...
// System read error...
Err(e) => warn!("{logfile}:{curline}: {e}"),
}
line.clear();
Expand Down Expand Up @@ -296,85 +296,94 @@ fn parse_version(atom: &str, filter: &FilterStr) -> Option<usize> {
}
}

/// Parse and filter timestamp
// TODO MSRV 1.80: from_utf8(s.trim_ascii_start())
/// Parse and filter timestamp, trim line
fn parse_ts(line: &[u8], min: i64, max: i64) -> Option<(i64, &[u8])> {
use atoi::FromRadix10;
match i64::from_radix_10(line) {
(ts, n) if n != 0 && ts >= min && ts <= max => {
let mut line = &line[(n + 1)..];
while let Some(32) = line.first() {
line = &line[1..];
while let [b' ', rest @ ..] = line {
line = rest;
}
if let [rest @ .., b'\n'] = line {
line = rest;
}
Some((ts, line))
},
_ => None,
}
}

/// *** emerge --update --ask --deep --reinstall=changed-use --regex-search-auto=y --verbose system
fn parse_runstart(enabled: bool, ts: i64, line: &[u8]) -> Option<Hist> {
if !enabled || !line.starts_with(b"*** emerge") {
if !enabled || !line.starts_with(b"*** emer") {
return None;
}
Some(Hist::RunStart { ts, args: from_utf8(&line[11..]).ok()?.trim().to_owned() })
Some(Hist::RunStart { ts, args: from_utf8(&line[11..]).ok()?.to_owned() })
}

/// >>> emerge (1 of 1) www-client/falkon-24.08.3 to /
fn parse_mergestart(enabled: bool, ts: i64, line: &[u8], filter: &FilterStr) -> Option<Hist> {
if !enabled || !line.starts_with(b">>> emer") {
return None;
}
let mut tokens = from_utf8(line).ok()?.split_ascii_whitespace();
let t6 = tokens.nth(5)?;
let pos = parse_version(t6, filter)?;
Some(Hist::MergeStart { ts, key: t6.to_owned(), pos })
let mut tokens = line.split(|c| *c == b' ');
let atom = from_utf8(tokens.nth(5)?).ok()?;
let pos = parse_version(atom, filter)?;
Some(Hist::MergeStart { ts, key: atom.to_owned(), pos })
}

/// === (1 of 1) Merging Binary (www-client/falkon-24.08.3::/var/cache/binpkgs/www-client/falkon-24.08.3.gpkg.tar)
fn parse_mergebin(enabled: bool, ts: i64, line: &[u8], filter: &FilterStr) -> Option<Hist> {
if !enabled || !line.starts_with(b"=== (") {
return None;
}
let line = from_utf8(line).ok()?;
let p1 = line.find(')')?;
if !&line[(p1 + 2)..].starts_with("Merging Bin") {
let p1 = memchr(b')', line)?;
if !&line[(p1 + 2)..].starts_with(b"Merging Bin") {
return None;
}
let p2 = line[p1..].find('(')? + p1 + 1;
let p3 = line[p2..].find("::")? + p2;
let pos = parse_version(&line[p2..p3], filter)?;
Some(Hist::MergeBin { ts, key: line[p2..p3].to_owned(), pos })
let p2 = memchr(b'(', &line[p1..])? + p1 + 1;
let p3 = memchr(b':', &line[p2..])? + p2;
let atom = from_utf8(&line[p2..p3]).ok()?;
let pos = parse_version(atom, filter)?;
Some(Hist::MergeBin { ts, key: atom.to_owned(), pos })
}

/// ::: completed emerge (1 of 1) www-client/falkon-24.08.3 to /
fn parse_mergestop(enabled: bool, ts: i64, line: &[u8], filter: &FilterStr) -> Option<Hist> {
if !enabled || !line.starts_with(b"::: comp") {
return None;
}
let mut tokens = from_utf8(line).ok()?.split_ascii_whitespace();
let t7 = tokens.nth(6)?;
let pos = parse_version(t7, filter)?;
Some(Hist::MergeStop { ts, key: t7.to_owned(), pos })
let mut tokens = line.split(|c| *c == b' ');
let atom = from_utf8(tokens.nth(6)?).ok()?;
let pos = parse_version(atom, filter)?;
Some(Hist::MergeStop { ts, key: atom.to_owned(), pos })
}

/// === Unmerging... (app-portage/getuto-1.13)
fn parse_unmergestart(enabled: bool, ts: i64, line: &[u8], filter: &FilterStr) -> Option<Hist> {
if !enabled || !line.starts_with(b"=== Unmerging...") {
if !enabled || !line.starts_with(b"=== Unmer") {
return None;
}
let mut tokens = from_utf8(line).ok()?.split_ascii_whitespace();
let t3 = tokens.nth(2)?;
let ebuild_version = &t3[1..t3.len() - 1];
let pos = parse_version(ebuild_version, filter)?;
Some(Hist::UnmergeStart { ts, key: ebuild_version.to_owned(), pos })
let p1 = memchr(b'(', line)? + 1;
let atom = from_utf8(&line[p1..line.len() - 1]).ok()?;
let pos = parse_version(atom, filter)?;
Some(Hist::UnmergeStart { ts, key: atom.to_owned(), pos })
}

/// >>> unmerge success: www-client/falkon-24.08.3
fn parse_unmergestop(enabled: bool, ts: i64, line: &[u8], filter: &FilterStr) -> Option<Hist> {
if !enabled || !line.starts_with(b">>> unmerge success") {
if !enabled || !line.starts_with(b">>> unmerge succ") {
return None;
}
let mut tokens = from_utf8(line).ok()?.split_ascii_whitespace();
let t3 = tokens.nth(3)?;
let pos = parse_version(t3, filter)?;
Some(Hist::UnmergeStop { ts, key: t3.to_owned(), pos })
let p1 = memrchr(b' ', line)? + 1;
let atom = from_utf8(&line[p1..]).ok()?;
let pos = parse_version(atom, filter)?;
Some(Hist::UnmergeStop { ts, key: atom.to_owned(), pos })
}

/// >>> Syncing repository 'gentoo' into '/usr/portage'...
/// >>> Starting rsync with rsync://91.186.30.235/gentoo-portage
fn parse_syncstart(enabled: bool, ts: i64, line: &[u8]) -> Option<Hist> {
// Old portage logs 'Starting rsync with <url>', new portage logs 'Syncing repository <name>',
// and intermediate versions log both. This makes it hard to properly match a start repo string
Expand All @@ -390,25 +399,20 @@ fn parse_syncstart(enabled: bool, ts: i64, line: &[u8]) -> Option<Hist> {
None
}
}

/// === Sync completed with rsync://209.177.148.226/gentoo-portage
/// === Sync completed for gentoo
fn parse_syncstop(enabled: bool, ts: i64, line: &[u8], filter: &FilterStr) -> Option<Hist> {
// Old portage logs 'completed with <url>', new portage logs 'completed for <name>'
if !enabled || !line.starts_with(b"=== Sync completed") {
if !enabled || !line.starts_with(b"=== Sync comp") {
return None;
}
let line = from_utf8(line).ok()?;
let repo = match line.rsplit_once(['/', ' ']) {
Some((_, r)) => String::from(r.trim()),
_ => {
warn!("Can't find sync repo name in {ts} {line}");
String::from("unknown")
},
};
if !filter.match_str(&repo) {
return None;
}
Some(Hist::SyncStop { ts, repo })
let pos = memrchr2(b' ', b'/', line)? + 1;
let repo = from_utf8(&line[pos..]).ok()?;
filter.match_str(repo).then_some(Hist::SyncStop { ts, repo: repo.to_owned() })
}


#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -770,12 +774,12 @@ mod bench {
($n:ident, $f:expr) => {
#[bench]
fn $n(b: &mut test::Bencher) {
let filter = FilterStr::try_new(&vec![], true).unwrap();
let f = FilterStr::True;
let lines = &*EMERGE_LINES;
b.iter(move || {
let mut found = 0;
for (l1, l2) in lines {
found += $f(&filter, l1, l2).is_some() as u32;
found += $f(&f, l1, l2).is_some() as u32;
}
assert!(found > 2, "Only {found} matches for {}", stringify!($f));
});
Expand All @@ -790,5 +794,5 @@ mod bench {
bench_parselines!(parse_unmergestart_, |f, _, s| parse_unmergestart(true, 1, s, f));
bench_parselines!(parse_unmergestop_, |f, _, s| parse_unmergestop(true, 1, s, f));
bench_parselines!(parse_syncstart_, |_, _, s| parse_syncstart(true, 1, s));
bench_parselines!(parse_stncstop_, |f, _, s| parse_syncstop(true, 1, s, f));
bench_parselines!(parse_syncstop_, |f, _, s| parse_syncstop(true, 1, s, f));
}
3 changes: 3 additions & 0 deletions tests/emerge.badtimestamp.log
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
1327867709: Started emerge on: Jan 29, 2012 20:08:29
1327867709: *** emerge --newuse --deep --ask --update --verbose world
1327867778l: >>> emerge (1 of 3) media-libs/jpeg-8c-r1 to /
1327867778
1327867778:
1327867804: ::: completed emerge (1 of 3) media-libs/jpeg-8c-r1 to /
1327867804: >>> emerge (2 of 3) dev-libs/libical-0.48 to /
1327867830: ::: completed emerge (2 of 3) dev-libs/libical-0.48 to /
Expand All @@ -9,3 +11,4 @@
1327871047: *** Finished. Cleaning up...
1327871049: *** exiting successfully.
1327871057: *** terminating.
14341543

0 comments on commit 3b1238d

Please sign in to comment.