Skip to content

Commit da0b424

Browse files
authored
Adding LruDataCache and overlay examples (#2914)
1 parent dda5753 commit da0b424

File tree

4 files changed

+237
-5
lines changed

4 files changed

+237
-5
lines changed

Cargo.lock

Lines changed: 22 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/tutorials/data_provider.md

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,169 @@ impl AdditiveIdentity {
6161
}
6262
}
6363
```
64+
65+
## Caching Data Provider
66+
67+
ICU4X has no internal caches because there is no one-size-fits-all solution. It is easy for clients to implement their own cache for ICU4X, and although this is not generally required or recommended, it may be beneficial when latency is of utmost importance and, for example, a less-efficient data provider such as JSON is being used.
68+
69+
The following example illustrates an LRU cache on top of a BufferProvider that saves deserialized data payloads as type-erased objects and then checks for a cache hit before calling the inner provider.
70+
71+
```rust
72+
use icu_provider::hello_world::HelloWorldFormatter;
73+
use icu_provider::prelude::*;
74+
use icu::locid::locale;
75+
use lru::LruCache;
76+
use std::borrow::{Borrow, Cow};
77+
use std::convert::TryInto;
78+
use std::sync::Mutex;
79+
use yoke::trait_hack::YokeTraitHack;
80+
use yoke::Yokeable;
81+
use zerofrom::ZeroFrom;
82+
83+
#[derive(Debug, PartialEq, Eq, Hash)]
84+
struct CacheKeyWrap(CacheKey<'static>);
85+
86+
#[derive(Debug, PartialEq, Eq, Hash)]
87+
struct CacheKey<'a>(DataKey, Cow<'a, DataLocale>);
88+
89+
pub struct LruDataCache<P> {
90+
cache: Mutex<LruCache<CacheKeyWrap, AnyResponse>>,
91+
provider: P,
92+
}
93+
94+
// This impl enables a borrowed DataLocale to be used during cache retrieval.
95+
impl<'a> Borrow<CacheKey<'a>> for lru::KeyRef<CacheKeyWrap> {
96+
fn borrow(&self) -> &CacheKey<'a> {
97+
&Borrow::<CacheKeyWrap>::borrow(self).0
98+
}
99+
}
100+
101+
impl<M, P> DataProvider<M> for LruDataCache<P>
102+
where
103+
M: KeyedDataMarker + 'static,
104+
M::Yokeable: ZeroFrom<'static, M::Yokeable>,
105+
M::Yokeable: icu_provider::MaybeSendSync,
106+
for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Clone,
107+
P: DataProvider<M>,
108+
{
109+
fn load(&self, req: DataRequest) -> Result<DataResponse<M>, DataError> {
110+
{
111+
// First lock: cache retrieval
112+
let mut cache = self.cache.lock().unwrap();
113+
let borrowed_cache_key = CacheKey(M::KEY, Cow::Borrowed(req.locale));
114+
if let Some(any_res) = cache.get(&borrowed_cache_key) {
115+
// Note: Cloning a DataPayload is usually cheap, and it is necessary in order to
116+
// convert the short-lived cache object into one we can return.
117+
return any_res.downcast_cloned();
118+
}
119+
}
120+
// Release the lock to invoke the inner provider
121+
let response = self.provider.load(req)?;
122+
let owned_cache_key = CacheKeyWrap(CacheKey(M::KEY, Cow::Owned(req.locale.clone())));
123+
// Second lock: cache storage
124+
self.cache.lock()
125+
.unwrap()
126+
.get_or_insert(owned_cache_key, || response.wrap_into_any_response())
127+
.downcast_cloned()
128+
}
129+
}
130+
131+
// Usage example:
132+
let provider = icu_testdata::buffer();
133+
let lru_capacity = 100usize.try_into().unwrap();
134+
let provider = LruDataCache {
135+
cache: Mutex::new(LruCache::new(lru_capacity)),
136+
provider: provider.as_deserializing(),
137+
};
138+
139+
// The cache starts empty:
140+
assert_eq!(provider.cache.lock().unwrap().len(), 0);
141+
142+
assert_eq!(
143+
"こんにちは世界",
144+
// Note: It is necessary to use `try_new_unstable` with LruDataCache.
145+
HelloWorldFormatter::try_new_unstable(
146+
&provider,
147+
&locale!("ja").into()
148+
)
149+
.unwrap()
150+
.format_to_string()
151+
);
152+
153+
// One item in the cache:
154+
assert_eq!(provider.cache.lock().unwrap().len(), 1);
155+
156+
assert_eq!(
157+
"ওহে বিশ্ব",
158+
HelloWorldFormatter::try_new_unstable(
159+
&provider,
160+
&locale!("bn").into()
161+
)
162+
.unwrap()
163+
.format_to_string()
164+
);
165+
166+
// Two items in the cache:
167+
assert_eq!(provider.cache.lock().unwrap().len(), 2);
168+
169+
assert_eq!(
170+
"こんにちは世界",
171+
HelloWorldFormatter::try_new_unstable(
172+
&provider,
173+
&locale!("ja").into()
174+
)
175+
.unwrap()
176+
.format_to_string()
177+
);
178+
179+
// Still only two items in the cache, since we re-requested "ja" data:
180+
assert_eq!(provider.cache.lock().unwrap().len(), 2);
181+
```
182+
183+
## Overwriting Specific Data Items
184+
185+
ICU4X's explicit data pipeline allows for specific data entries to be overwritten in order to customize the output or comply with policy.
186+
187+
The following example illustrates how to overwrite the decimal separators for a region.
188+
189+
```rust
190+
use icu::decimal::FixedDecimalFormatter;
191+
use icu_provider::prelude::*;
192+
use icu::locid::locale;
193+
use icu::locid::subtags_region as region;
194+
use std::borrow::Cow;
195+
use tinystr::tinystr;
196+
197+
pub struct CustomDecimalSymbolsProvider<P>(P);
198+
199+
impl<P> AnyProvider for CustomDecimalSymbolsProvider<P>
200+
where
201+
P: AnyProvider
202+
{
203+
fn load_any(&self, key: DataKey, req: DataRequest) -> Result<AnyResponse, DataError> {
204+
use icu::decimal::provider::DecimalSymbolsV1Marker;
205+
let mut any_res = self.0.load_any(key, req)?;
206+
if key == DecimalSymbolsV1Marker::KEY && req.locale.region() == Some(region!("CH")) {
207+
let mut res: DataResponse<DecimalSymbolsV1Marker> = any_res.downcast()?;
208+
if let Some(payload) = &mut res.payload.as_mut() {
209+
payload.with_mut(|data| {
210+
// Change the grouping separator for all Swiss locales to '🐮'
211+
data.grouping_separator = Cow::Borrowed("🐮");
212+
});
213+
}
214+
any_res = res.wrap_into_any_response();
215+
}
216+
Ok(any_res)
217+
}
218+
}
219+
220+
let provider = CustomDecimalSymbolsProvider(icu_testdata::any());
221+
let formatter = FixedDecimalFormatter::try_new_with_any_provider(
222+
&provider,
223+
&locale!("de-CH").into(),
224+
Default::default(),
225+
)
226+
.unwrap();
227+
228+
assert_eq!(formatter.format_to_string(&100007i64.into()), "100🐮007");
229+
```

experimental/tutorials/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ icu = { version = "1.0.0", path = "../../components/icu", default-features = fal
3535
icu_provider = { version = "1.0.0", path = "../../provider/core", default-features = false }
3636
icu_provider_fs = { version = "1.0.0", path = "../../provider/fs" }
3737
icu_provider_blob = { version = "1.0.0", path = "../../provider/blob" }
38-
icu_testdata = { version = "1.0.0", path = "../../provider/testdata" }
38+
icu_testdata = { version = "1.0.0", path = "../../provider/testdata", features = ["buffer"] }
3939
zerofrom = { version = "0.1.0", path = "../../utils/zerofrom" }
4040
serde = { version = "1.0", features = ["derive", "alloc", "std"] }
4141
icu_datagen = { version = "1.0.0", path = "../../provider/datagen" }
@@ -47,3 +47,4 @@ databake = { version = "0.1.0", path = "../../utils/databake", features = ["deri
4747
serde-aux = "2.1.1"
4848
itertools = "0.10"
4949
embed-doc-image = "0.1"
50+
lru = "0.8.1"

provider/core/src/any.rs

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,19 @@ impl AnyPayload {
115115
}
116116
}
117117

118+
/// Clones and then transforms a type-erased `AnyPayload` into a concrete `DataPayload<M>`.
119+
pub fn downcast_cloned<M>(&self) -> Result<DataPayload<M>, DataError>
120+
where
121+
M: DataMarker + 'static,
122+
// For the StructRef case:
123+
M::Yokeable: ZeroFrom<'static, M::Yokeable>,
124+
// For the PayloadRc case:
125+
M::Yokeable: MaybeSendSync,
126+
for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Clone,
127+
{
128+
self.clone().downcast()
129+
}
130+
118131
/// Creates an `AnyPayload` from a static reference to a data struct.
119132
///
120133
/// # Examples
@@ -230,7 +243,7 @@ impl From<AnyResponse> for DataResponse<AnyMarker> {
230243
}
231244

232245
impl AnyResponse {
233-
/// Transforms a type-erased `DataResponse<AnyMarker>` into a concrete `DataResponse<M>`.
246+
/// Transforms a type-erased `AnyResponse` into a concrete `DataResponse<M>`.
234247
#[inline]
235248
pub fn downcast<M>(self) -> Result<DataResponse<M>, DataError>
236249
where
@@ -244,6 +257,39 @@ impl AnyResponse {
244257
payload: self.payload.map(|p| p.downcast()).transpose()?,
245258
})
246259
}
260+
261+
/// Clones and then transforms a type-erased `AnyResponse` into a concrete `DataResponse<M>`.
262+
pub fn downcast_cloned<M>(&self) -> Result<DataResponse<M>, DataError>
263+
where
264+
M: DataMarker + 'static,
265+
M::Yokeable: ZeroFrom<'static, M::Yokeable>,
266+
M::Yokeable: MaybeSendSync,
267+
for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Clone,
268+
{
269+
Ok(DataResponse {
270+
metadata: self.metadata.clone(),
271+
payload: self
272+
.payload
273+
.as_ref()
274+
.map(|p| p.downcast_cloned())
275+
.transpose()?,
276+
})
277+
}
278+
}
279+
280+
impl<M> DataResponse<M>
281+
where
282+
M: DataMarker + 'static,
283+
M::Yokeable: MaybeSendSync,
284+
{
285+
/// Moves the inner DataPayload to the heap (requiring an allocation) and returns it as an
286+
/// erased `AnyResponse`.
287+
pub fn wrap_into_any_response(self) -> AnyResponse {
288+
AnyResponse {
289+
metadata: self.metadata,
290+
payload: self.payload.map(|p| p.wrap_into_any_payload()),
291+
}
292+
}
247293
}
248294

249295
/// An object-safe data provider that returns data structs cast to `dyn Any` trait objects.

0 commit comments

Comments
 (0)