diff --git a/Cargo.lock b/Cargo.lock index 405710cb8b9..51757a12966 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1189,6 +1189,15 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + [[package]] name = "heapless" version = "0.7.7" @@ -1973,6 +1982,7 @@ dependencies = [ "icu_testdata", "itertools", "litemap", + "lru", "serde", "serde-aux", "tinystr 0.7.0", @@ -2010,7 +2020,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.11.2", "serde", ] @@ -2181,6 +2191,15 @@ dependencies = [ "syn", ] +[[package]] +name = "lru" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6e8aaa3f231bb4bd57b84b2d5dc3ae7f350265df8aa96492e0bc394a1571909" +dependencies = [ + "hashbrown 0.12.3", +] + [[package]] name = "mach" version = "0.3.2" @@ -2398,7 +2417,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40bec70ba014595f99f7aa110b84331ffe1ee9aece7fe6f387cc7e3ecda4d456" dependencies = [ "crc32fast", - "hashbrown", + "hashbrown 0.11.2", "indexmap", "memchr", ] @@ -2869,7 +2888,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e419b2e30d088b21c4bf3072561535305df8066e89937ad05fc205b99874c23c" dependencies = [ "bytecheck", - "hashbrown", + "hashbrown 0.11.2", "ptr_meta", "rend", "rkyv_derive", diff --git a/docs/tutorials/data_provider.md b/docs/tutorials/data_provider.md index cc7b2c72cb4..001b899625b 100644 --- a/docs/tutorials/data_provider.md +++ b/docs/tutorials/data_provider.md @@ -61,3 +61,169 @@ impl AdditiveIdentity { } } ``` + +## Caching Data Provider + +ICU4X has no internal caches because there is no one-size-fits-all solution. It is easy for clients to implement their own cache for ICU4X, and although this is not generally required or recommended, it may be beneficial when latency is of utmost importance and, for example, a less-efficient data provider such as JSON is being used. + +The following example illustrates an LRU cache on top of a BufferProvider that saves deserialized data payloads as type-erased objects and then checks for a cache hit before calling the inner provider. + +```rust +use icu_provider::hello_world::HelloWorldFormatter; +use icu_provider::prelude::*; +use icu::locid::locale; +use lru::LruCache; +use std::borrow::{Borrow, Cow}; +use std::convert::TryInto; +use std::sync::Mutex; +use yoke::trait_hack::YokeTraitHack; +use yoke::Yokeable; +use zerofrom::ZeroFrom; + +#[derive(Debug, PartialEq, Eq, Hash)] +struct CacheKeyWrap(CacheKey<'static>); + +#[derive(Debug, PartialEq, Eq, Hash)] +struct CacheKey<'a>(DataKey, Cow<'a, DataLocale>); + +pub struct LruDataCache

{ + cache: Mutex>, + provider: P, +} + +// This impl enables a borrowed DataLocale to be used during cache retrieval. +impl<'a> Borrow> for lru::KeyRef { + fn borrow(&self) -> &CacheKey<'a> { + &Borrow::::borrow(self).0 + } +} + +impl DataProvider for LruDataCache

+where + M: KeyedDataMarker + 'static, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: icu_provider::MaybeSendSync, + for<'a> YokeTraitHack<>::Output>: Clone, + P: DataProvider, +{ + fn load(&self, req: DataRequest) -> Result, DataError> { + { + // First lock: cache retrieval + let mut cache = self.cache.lock().unwrap(); + let borrowed_cache_key = CacheKey(M::KEY, Cow::Borrowed(req.locale)); + if let Some(any_res) = cache.get(&borrowed_cache_key) { + // Note: Cloning a DataPayload is usually cheap, and it is necessary in order to + // convert the short-lived cache object into one we can return. + return any_res.downcast_cloned(); + } + } + // Release the lock to invoke the inner provider + let response = self.provider.load(req)?; + let owned_cache_key = CacheKeyWrap(CacheKey(M::KEY, Cow::Owned(req.locale.clone()))); + // Second lock: cache storage + self.cache.lock() + .unwrap() + .get_or_insert(owned_cache_key, || response.wrap_into_any_response()) + .downcast_cloned() + } +} + +// Usage example: +let provider = icu_testdata::buffer(); +let lru_capacity = 100usize.try_into().unwrap(); +let provider = LruDataCache { + cache: Mutex::new(LruCache::new(lru_capacity)), + provider: provider.as_deserializing(), +}; + +// The cache starts empty: +assert_eq!(provider.cache.lock().unwrap().len(), 0); + +assert_eq!( + "こんにちは世界", + // Note: It is necessary to use `try_new_unstable` with LruDataCache. + HelloWorldFormatter::try_new_unstable( + &provider, + &locale!("ja").into() + ) + .unwrap() + .format_to_string() +); + +// One item in the cache: +assert_eq!(provider.cache.lock().unwrap().len(), 1); + +assert_eq!( + "ওহে বিশ্ব", + HelloWorldFormatter::try_new_unstable( + &provider, + &locale!("bn").into() + ) + .unwrap() + .format_to_string() +); + +// Two items in the cache: +assert_eq!(provider.cache.lock().unwrap().len(), 2); + +assert_eq!( + "こんにちは世界", + HelloWorldFormatter::try_new_unstable( + &provider, + &locale!("ja").into() + ) + .unwrap() + .format_to_string() +); + +// Still only two items in the cache, since we re-requested "ja" data: +assert_eq!(provider.cache.lock().unwrap().len(), 2); +``` + +## Overwriting Specific Data Items + +ICU4X's explicit data pipeline allows for specific data entries to be overwritten in order to customize the output or comply with policy. + +The following example illustrates how to overwrite the decimal separators for a region. + +```rust +use icu::decimal::FixedDecimalFormatter; +use icu_provider::prelude::*; +use icu::locid::locale; +use icu::locid::subtags_region as region; +use std::borrow::Cow; +use tinystr::tinystr; + +pub struct CustomDecimalSymbolsProvider

(P); + +impl

AnyProvider for CustomDecimalSymbolsProvider

+where + P: AnyProvider +{ + fn load_any(&self, key: DataKey, req: DataRequest) -> Result { + use icu::decimal::provider::DecimalSymbolsV1Marker; + let mut any_res = self.0.load_any(key, req)?; + if key == DecimalSymbolsV1Marker::KEY && req.locale.region() == Some(region!("CH")) { + let mut res: DataResponse = any_res.downcast()?; + if let Some(payload) = &mut res.payload.as_mut() { + payload.with_mut(|data| { + // Change the grouping separator for all Swiss locales to '🐮' + data.grouping_separator = Cow::Borrowed("🐮"); + }); + } + any_res = res.wrap_into_any_response(); + } + Ok(any_res) + } +} + +let provider = CustomDecimalSymbolsProvider(icu_testdata::any()); +let formatter = FixedDecimalFormatter::try_new_with_any_provider( + &provider, + &locale!("de-CH").into(), + Default::default(), +) +.unwrap(); + +assert_eq!(formatter.format_to_string(&100007i64.into()), "100🐮007"); +``` diff --git a/experimental/tutorials/Cargo.toml b/experimental/tutorials/Cargo.toml index 6da4b88dc20..964f067477f 100644 --- a/experimental/tutorials/Cargo.toml +++ b/experimental/tutorials/Cargo.toml @@ -35,7 +35,7 @@ icu = { version = "1.0.0", path = "../../components/icu", default-features = fal icu_provider = { version = "1.0.0", path = "../../provider/core", default-features = false } icu_provider_fs = { version = "1.0.0", path = "../../provider/fs" } icu_provider_blob = { version = "1.0.0", path = "../../provider/blob" } -icu_testdata = { version = "1.0.0", path = "../../provider/testdata" } +icu_testdata = { version = "1.0.0", path = "../../provider/testdata", features = ["buffer"] } zerofrom = { version = "0.1.0", path = "../../utils/zerofrom" } serde = { version = "1.0", features = ["derive", "alloc", "std"] } icu_datagen = { version = "1.0.0", path = "../../provider/datagen" } @@ -47,3 +47,4 @@ databake = { version = "0.1.0", path = "../../utils/databake", features = ["deri serde-aux = "2.1.1" itertools = "0.10" embed-doc-image = "0.1" +lru = "0.8.1" diff --git a/provider/core/src/any.rs b/provider/core/src/any.rs index 1c7a60435e5..49fdefc675a 100644 --- a/provider/core/src/any.rs +++ b/provider/core/src/any.rs @@ -115,6 +115,19 @@ impl AnyPayload { } } + /// Clones and then transforms a type-erased `AnyPayload` into a concrete `DataPayload`. + pub fn downcast_cloned(&self) -> Result, DataError> + where + M: DataMarker + 'static, + // For the StructRef case: + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + // For the PayloadRc case: + M::Yokeable: MaybeSendSync, + for<'a> YokeTraitHack<>::Output>: Clone, + { + self.clone().downcast() + } + /// Creates an `AnyPayload` from a static reference to a data struct. /// /// # Examples @@ -230,7 +243,7 @@ impl From for DataResponse { } impl AnyResponse { - /// Transforms a type-erased `DataResponse` into a concrete `DataResponse`. + /// Transforms a type-erased `AnyResponse` into a concrete `DataResponse`. #[inline] pub fn downcast(self) -> Result, DataError> where @@ -244,6 +257,39 @@ impl AnyResponse { payload: self.payload.map(|p| p.downcast()).transpose()?, }) } + + /// Clones and then transforms a type-erased `AnyResponse` into a concrete `DataResponse`. + pub fn downcast_cloned(&self) -> Result, DataError> + where + M: DataMarker + 'static, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, + for<'a> YokeTraitHack<>::Output>: Clone, + { + Ok(DataResponse { + metadata: self.metadata.clone(), + payload: self + .payload + .as_ref() + .map(|p| p.downcast_cloned()) + .transpose()?, + }) + } +} + +impl DataResponse +where + M: DataMarker + 'static, + M::Yokeable: MaybeSendSync, +{ + /// Moves the inner DataPayload to the heap (requiring an allocation) and returns it as an + /// erased `AnyResponse`. + pub fn wrap_into_any_response(self) -> AnyResponse { + AnyResponse { + metadata: self.metadata, + payload: self.payload.map(|p| p.wrap_into_any_payload()), + } + } } /// An object-safe data provider that returns data structs cast to `dyn Any` trait objects.