Skip to content

Use LikelySubtagsForLanguageV1 for fallback #5338

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/experimental/src/personnames/formatter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ impl PersonNamesFormatter {
P: ?Sized
+ DataProvider<icu_properties::provider::ScriptWithExtensionsPropertyV1Marker>
+ DataProvider<icu_properties::provider::ScriptValueToShortNameV1Marker>
+ DataProvider<icu_locale::provider::LocaleFallbackLikelySubtagsV1Marker>
+ DataProvider<icu_locale::provider::LikelySubtagsForLanguageV1Marker>
+ DataProvider<icu_locale::provider::LocaleFallbackParentsV1Marker>,
{
let swe = icu_properties::script::load_script_with_extensions_unstable(provider)?;
Expand Down
2 changes: 1 addition & 1 deletion components/experimental/tests/personnames/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const _: () = {

make_provider!(TestingProvider);
impl_person_names_format_v1_marker!(TestingProvider);
icu_locale_data::impl_locale_fallback_likely_subtags_v1_marker!(TestingProvider);
icu_locale_data::impl_likely_subtags_for_language_v1_marker!(TestingProvider);
icu_locale_data::impl_locale_fallback_parents_v1_marker!(TestingProvider);
icu_properties_data::impl_script_value_to_short_name_v1_marker!(TestingProvider);
icu_properties_data::impl_script_with_extensions_property_v1_marker!(TestingProvider);
Expand Down
59 changes: 28 additions & 31 deletions components/locale/src/fallback/algorithms.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,43 +16,40 @@ impl<'a> LocaleFallbackerWithConfig<'a> {
if let Some(script) = locale.script {
locale.region = self
.likely_subtags
.ls2r
.get_2d(
&language.into_tinystr().to_unvalidated(),
&script.into_tinystr().to_unvalidated(),
)
.language_script
.get(&(
language.into_tinystr().to_unvalidated(),
script.into_tinystr().to_unvalidated(),
))
.copied();
}
// 1b. If that fails, try language only
if locale.region.is_none() {
locale.region = self
.likely_subtags
.l2r
.get(&language.into_tinystr().to_unvalidated())
.copied();
.language
.get_copied(&language.into_tinystr().to_unvalidated())
.map(|(_s, r)| r);
}
}
// 2. Remove the script if it is implied by the other subtags
if let Some(script) = locale.script {
let default_script = self
.likely_subtags
.l2s
.get_copied(&language.into_tinystr().to_unvalidated())
.unwrap_or(DEFAULT_SCRIPT);
if let Some(region) = locale.region {
if script
== self
.likely_subtags
.lr2s
.get_copied_2d(
&language.into_tinystr().to_unvalidated(),
&region.into_tinystr().to_unvalidated(),
)
.unwrap_or(default_script)
{
locale.script = None;
}
} else if script == default_script {
let default_script = locale
.region
.and_then(|region| {
self.likely_subtags.language_region.get_copied(&(
language.into_tinystr().to_unvalidated(),
region.into_tinystr().to_unvalidated(),
))
})
.or_else(|| {
self.likely_subtags
.language
.get_copied(&language.into_tinystr().to_unvalidated())
.map(|(s, _r)| s)
});

if Some(script) == default_script {
locale.script = None;
}
}
Expand Down Expand Up @@ -104,10 +101,10 @@ impl<'a> LocaleFallbackIteratorInner<'a> {
if locale.script.is_none() {
if let Some(region) = locale.region {
let language = locale.language;
if let Some(script) = self.likely_subtags.lr2s.get_copied_2d(
&language.into_tinystr().to_unvalidated(),
&region.into_tinystr().to_unvalidated(),
) {
if let Some(script) = self.likely_subtags.language_region.get_copied(&(
language.into_tinystr().to_unvalidated(),
region.into_tinystr().to_unvalidated(),
)) {
locale.script = Some(script);
self.restore_subdivision_variants(locale);
return;
Expand Down
25 changes: 17 additions & 8 deletions components/locale/src/fallback/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,29 +58,29 @@ mod algorithms;
#[doc(hidden)] // canonical location in super
#[derive(Debug, Clone, PartialEq)]
pub struct LocaleFallbacker {
likely_subtags: DataPayload<LocaleFallbackLikelySubtagsV1Marker>,
likely_subtags: DataPayload<LikelySubtagsForLanguageV1Marker>,
parents: DataPayload<LocaleFallbackParentsV1Marker>,
}

/// Borrowed version of [`LocaleFallbacker`].
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct LocaleFallbackerBorrowed<'a> {
likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>,
likely_subtags: &'a LikelySubtagsForLanguageV1<'a>,
parents: &'a LocaleFallbackParentsV1<'a>,
}

/// A [`LocaleFallbackerBorrowed`] with an associated [`LocaleFallbackConfig`].
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct LocaleFallbackerWithConfig<'a> {
likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>,
likely_subtags: &'a LikelySubtagsForLanguageV1<'a>,
parents: &'a LocaleFallbackParentsV1<'a>,
config: LocaleFallbackConfig,
}

/// Inner iteration type. Does not own the item under fallback.
#[derive(Debug)]
struct LocaleFallbackIteratorInner<'a> {
likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>,
likely_subtags: &'a LikelySubtagsForLanguageV1<'a>,
parents: &'a LocaleFallbackParentsV1<'a>,
config: LocaleFallbackConfig,
backup_subdivision: Option<Subtag>,
Expand Down Expand Up @@ -108,8 +108,7 @@ impl LocaleFallbacker {
#[allow(clippy::new_ret_no_self)] // keeping constructors together
pub const fn new<'a>() -> LocaleFallbackerBorrowed<'a> {
let tickstatic = LocaleFallbackerBorrowed {
likely_subtags:
crate::provider::Baked::SINGLETON_LOCALE_FALLBACK_LIKELY_SUBTAGS_V1_MARKER,
likely_subtags: crate::provider::Baked::SINGLETON_LIKELY_SUBTAGS_FOR_LANGUAGE_V1_MARKER,
parents: crate::provider::Baked::SINGLETON_LOCALE_FALLBACK_PARENTS_V1_MARKER,
};
// Safety: we're transmuting down from LocaleFallbackerBorrowed<'static> to LocaleFallbackerBorrowed<'a>
Expand All @@ -132,7 +131,7 @@ impl LocaleFallbacker {
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError>
where
P: DataProvider<LocaleFallbackLikelySubtagsV1Marker>
P: DataProvider<LikelySubtagsForLanguageV1Marker>
+ DataProvider<LocaleFallbackParentsV1Marker>
+ ?Sized,
{
Expand All @@ -148,7 +147,17 @@ impl LocaleFallbacker {
/// surprising behavior, especially in multi-script languages.
pub fn new_without_data() -> Self {
LocaleFallbacker {
likely_subtags: DataPayload::from_owned(Default::default()),
likely_subtags: DataPayload::from_owned(LikelySubtagsForLanguageV1 {
language: Default::default(),
language_region: Default::default(),
language_script: Default::default(),
// Unused
und: (
Default::default(),
crate::subtags::script!("Zzzz"),
crate::subtags::region!("ZZ"),
),
}),
parents: DataPayload::from_owned(Default::default()),
}
}
Expand Down
51 changes: 1 addition & 50 deletions components/locale/src/provider/fallback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,10 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use super::*;
use icu_locale_core::subtags::{region, script, Language, Region, Script};
use icu_locale_core::subtags::{Language, Region, Script};
use icu_provider::prelude::*;
use zerovec::ule::UnvalidatedStr;
use zerovec::ZeroMap;
use zerovec::ZeroMap2d;

/// Locale fallback rules derived from likely subtags data.
#[icu_provider::data_struct(marker(
LocaleFallbackLikelySubtagsV1Marker,
"fallback/likelysubtags@1",
singleton
))]
#[derive(Default, Clone, PartialEq, Debug)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_locale::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[yoke(prove_covariance_manually)]
pub struct LocaleFallbackLikelySubtagsV1<'data> {
/// Map from language to the default script in that language. Languages whose default script
/// is `Latn` are not included in the map for data size savings.
///
/// Example: "zh" defaults to "Hans", which is in this map.
#[cfg_attr(feature = "serde", serde(borrow))]
pub l2s: ZeroMap<'data, UnvalidatedLanguage, Script>,
/// Map from language-region pairs to a script. Only populated if the script is different
/// from the one in `l2s` for that language.
///
/// Example: "zh-TW" defaults to "Hant", which is in this map.
#[cfg_attr(feature = "serde", serde(borrow))]
pub lr2s: ZeroMap2d<'data, UnvalidatedLanguage, UnvalidatedRegion, Script>,
/// Map from language to the default region in that language. Languages whose default region
/// is `ZZ` are not included in the map for data size savings.
///
/// Example: "zh" defaults to "CN".
#[cfg_attr(feature = "serde", serde(borrow))]
pub l2r: ZeroMap<'data, UnvalidatedLanguage, Region>,
/// Map from language-script pairs to a region. Only populated if the region is different
/// from the one in `l2r` for that language.
///
/// Example: "zh-Hant" defaults to "TW".
#[cfg_attr(feature = "serde", serde(borrow))]
pub ls2r: ZeroMap2d<'data, UnvalidatedLanguage, UnvalidatedScript, Region>,
}

/// `Latn` is the most common script, so it is defaulted for data size savings.
pub const DEFAULT_SCRIPT: Script = script!("Latn");

/// `ZZ` is the most common region, so it is defaulted for data size savings.
pub const DEFAULT_REGION: Region = region!("ZZ");

/// Locale fallback rules derived from CLDR parent locales data.
#[icu_provider::data_struct(marker(
Expand Down
2 changes: 0 additions & 2 deletions components/locale/src/provider/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ const _: () = {
pub use crate as locale;
}
make_provider!(Baked);
impl_locale_fallback_likely_subtags_v1_marker!(Baked);
impl_locale_fallback_parents_v1_marker!(Baked);
impl_aliases_v2_marker!(Baked);
impl_likely_subtags_extended_v1_marker!(Baked);
Expand All @@ -63,7 +62,6 @@ pub const MARKERS: &[DataMarkerInfo] = &[
LikelySubtagsExtendedV1Marker::INFO,
LikelySubtagsForLanguageV1Marker::INFO,
LikelySubtagsForScriptRegionV1Marker::INFO,
LocaleFallbackLikelySubtagsV1Marker::INFO,
LocaleFallbackParentsV1Marker::INFO,
ScriptDirectionV1Marker::INFO,
];
Expand Down

This file was deleted.

3 changes: 0 additions & 3 deletions provider/data/locale/data/mod.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion provider/data/locale/fingerprints.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
fallback/likelysubtags@1, <singleton>, 2228B, a6ec0551218ff716
fallback/parents@1, <singleton>, 2597B, 8f3b5fb7bff27d97
locid_transform/aliases@2, <singleton>, 8623B, 2c1368fc48eb6158
locid_transform/likelysubtags_ext@1, <singleton>, 75788B, 681a300516da7f39
Expand Down
Loading