Skip to content

Commit

Permalink
perf: Limit the cache size for to_datetime
Browse files Browse the repository at this point in the history
  • Loading branch information
reswqa committed Apr 22, 2024
1 parent 0c2783a commit 687302e
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 36 deletions.
77 changes: 48 additions & 29 deletions crates/polars-time/src/chunkedarray/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,13 @@ pub trait StringMethods: AsString {
};
let use_cache = use_cache && string_ca.len() > 50;

let mut convert = CachedFunc::new(|s| {
let naive_time = NaiveTime::parse_from_str(s, fmt).ok()?;
Some(time_to_time64ns(&naive_time))
});
let mut convert = CachedFunc::new(
|s| {
let naive_time = NaiveTime::parse_from_str(s, fmt).ok()?;
Some(time_to_time64ns(&naive_time))
},
(string_ca.len() as f64).sqrt() as usize,
);
let ca = string_ca.apply_generic(|opt_s| convert.eval(opt_s?, use_cache));
Ok(ca.with_name(string_ca.name()).into())
}
Expand Down Expand Up @@ -237,21 +240,27 @@ pub trait StringMethods: AsString {
// We can use the fast parser.
let ca = if let Some(fmt_len) = strptime::fmt_len(fmt.as_bytes()) {
let mut strptime_cache = StrpTimeState::default();
let mut convert = CachedFunc::new(|s: &str| {
// SAFETY: fmt_len is correct, it was computed with this `fmt` str.
match unsafe { strptime_cache.parse(s.as_bytes(), fmt.as_bytes(), fmt_len) } {
// Fallback to chrono.
None => NaiveDate::parse_from_str(s, &fmt).ok(),
Some(ndt) => Some(ndt.date()),
}
.map(naive_date_to_date)
});
let mut convert = CachedFunc::new(
|s: &str| {
// SAFETY: fmt_len is correct, it was computed with this `fmt` str.
match unsafe { strptime_cache.parse(s.as_bytes(), fmt.as_bytes(), fmt_len) } {
// Fallback to chrono.
None => NaiveDate::parse_from_str(s, &fmt).ok(),
Some(ndt) => Some(ndt.date()),
}
.map(naive_date_to_date)
},
(string_ca.len() as f64).sqrt() as usize,
);
string_ca.apply_generic(|val| convert.eval(val?, use_cache))
} else {
let mut convert = CachedFunc::new(|s| {
let naive_date = NaiveDate::parse_from_str(s, &fmt).ok()?;
Some(naive_date_to_date(naive_date))
});
let mut convert = CachedFunc::new(
|s| {
let naive_date = NaiveDate::parse_from_str(s, &fmt).ok()?;
Some(naive_date_to_date(naive_date))
},
(string_ca.len() as f64).sqrt() as usize,
);
string_ca.apply_generic(|val| convert.eval(val?, use_cache))
};

Expand Down Expand Up @@ -286,10 +295,13 @@ pub trait StringMethods: AsString {
if tz_aware {
#[cfg(feature = "timezones")]
{
let mut convert = CachedFunc::new(|s: &str| {
let dt = DateTime::parse_from_str(s, &fmt).ok()?;
Some(func(dt.naive_utc()))
});
let mut convert = CachedFunc::new(
|s: &str| {
let dt = DateTime::parse_from_str(s, &fmt).ok()?;
Some(func(dt.naive_utc()))
},
(string_ca.len() as f64).sqrt() as usize,
);
Ok(string_ca
.apply_generic(|opt_s| convert.eval(opt_s?, use_cache))
.with_name(string_ca.name())
Expand All @@ -308,16 +320,23 @@ pub trait StringMethods: AsString {
// We can use the fast parser.
let ca = if let Some(fmt_len) = self::strptime::fmt_len(fmt.as_bytes()) {
let mut strptime_cache = StrpTimeState::default();
let mut convert = CachedFunc::new(|s: &str| {
// SAFETY: fmt_len is correct, it was computed with this `fmt` str.
match unsafe { strptime_cache.parse(s.as_bytes(), fmt.as_bytes(), fmt_len) } {
None => transform(s, &fmt),
Some(ndt) => Some(func(ndt)),
}
});
let mut convert = CachedFunc::new(
|s: &str| {
// SAFETY: fmt_len is correct, it was computed with this `fmt` str.
match unsafe { strptime_cache.parse(s.as_bytes(), fmt.as_bytes(), fmt_len) }
{
None => transform(s, &fmt),
Some(ndt) => Some(func(ndt)),
}
},
(string_ca.len() as f64).sqrt() as usize,
);
string_ca.apply_generic(|opt_s| convert.eval(opt_s?, use_cache))
} else {
let mut convert = CachedFunc::new(|s| transform(s, &fmt));
let mut convert = CachedFunc::new(
|s| transform(s, &fmt),
(string_ca.len() as f64).sqrt() as usize,
);
string_ca.apply_generic(|opt_s| convert.eval(opt_s?, use_cache))
};
let dt = ca.with_name(string_ca.name()).into_datetime(tu, None);
Expand Down
11 changes: 4 additions & 7 deletions crates/polars-utils/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,9 @@ use ahash::RandomState;
use bytemuck::allocation::zeroed_vec;
use bytemuck::Zeroable;

use crate::aliases::PlHashMap;

pub struct CachedFunc<T, R, F> {
func: F,
cache: PlHashMap<T, R>,
cache: FastFixedCache<T, R>,
}

impl<T, R, F> CachedFunc<T, R, F>
Expand All @@ -20,19 +18,18 @@ where
T: std::hash::Hash + Eq + Clone,
R: Copy,
{
pub fn new(func: F) -> Self {
pub fn new(func: F, size: usize) -> Self {
Self {
func,
cache: PlHashMap::with_capacity_and_hasher(0, Default::default()),
cache: FastFixedCache::new(size),
}
}

pub fn eval(&mut self, x: T, use_cache: bool) -> R {
if use_cache {
*self
.cache
.entry(x)
.or_insert_with_key(|xr| (self.func)(xr.clone()))
.get_or_insert_with(&x, |xr| (self.func)(xr.clone()))
} else {
(self.func)(x)
}
Expand Down

0 comments on commit 687302e

Please sign in to comment.