diff --git a/components/experimental/src/personnames/formatter.rs b/components/experimental/src/personnames/formatter.rs index 2899d153f31..b9175ef22cc 100644 --- a/components/experimental/src/personnames/formatter.rs +++ b/components/experimental/src/personnames/formatter.rs @@ -46,7 +46,7 @@ impl PersonNamesFormatter { P: ?Sized + DataProvider + DataProvider - + DataProvider + + DataProvider + DataProvider, { let swe = icu_properties::script::load_script_with_extensions_unstable(provider)?; diff --git a/components/experimental/tests/personnames/tests.rs b/components/experimental/tests/personnames/tests.rs index 446b742fd28..17cfffa1822 100644 --- a/components/experimental/tests/personnames/tests.rs +++ b/components/experimental/tests/personnames/tests.rs @@ -24,7 +24,7 @@ const _: () = { make_provider!(TestingProvider); impl_person_names_format_v1_marker!(TestingProvider); - icu_locale_data::impl_locale_fallback_likely_subtags_v1_marker!(TestingProvider); + icu_locale_data::impl_likely_subtags_for_language_v1_marker!(TestingProvider); icu_locale_data::impl_locale_fallback_parents_v1_marker!(TestingProvider); icu_properties_data::impl_script_value_to_short_name_v1_marker!(TestingProvider); icu_properties_data::impl_script_with_extensions_property_v1_marker!(TestingProvider); diff --git a/components/locale/src/fallback/algorithms.rs b/components/locale/src/fallback/algorithms.rs index b4539775dce..3b41f51a6ff 100644 --- a/components/locale/src/fallback/algorithms.rs +++ b/components/locale/src/fallback/algorithms.rs @@ -16,43 +16,40 @@ impl<'a> LocaleFallbackerWithConfig<'a> { if let Some(script) = locale.script { locale.region = self .likely_subtags - .ls2r - .get_2d( - &language.into_tinystr().to_unvalidated(), - &script.into_tinystr().to_unvalidated(), - ) + .language_script + .get(&( + language.into_tinystr().to_unvalidated(), + script.into_tinystr().to_unvalidated(), + )) .copied(); } // 1b. If that fails, try language only if locale.region.is_none() { locale.region = self .likely_subtags - .l2r - .get(&language.into_tinystr().to_unvalidated()) - .copied(); + .language + .get_copied(&language.into_tinystr().to_unvalidated()) + .map(|(_s, r)| r); } } // 2. Remove the script if it is implied by the other subtags if let Some(script) = locale.script { - let default_script = self - .likely_subtags - .l2s - .get_copied(&language.into_tinystr().to_unvalidated()) - .unwrap_or(DEFAULT_SCRIPT); - if let Some(region) = locale.region { - if script - == self - .likely_subtags - .lr2s - .get_copied_2d( - &language.into_tinystr().to_unvalidated(), - ®ion.into_tinystr().to_unvalidated(), - ) - .unwrap_or(default_script) - { - locale.script = None; - } - } else if script == default_script { + let default_script = locale + .region + .and_then(|region| { + self.likely_subtags.language_region.get_copied(&( + language.into_tinystr().to_unvalidated(), + region.into_tinystr().to_unvalidated(), + )) + }) + .or_else(|| { + self.likely_subtags + .language + .get_copied(&language.into_tinystr().to_unvalidated()) + .map(|(s, _r)| s) + }); + + if Some(script) == default_script { locale.script = None; } } @@ -104,10 +101,10 @@ impl<'a> LocaleFallbackIteratorInner<'a> { if locale.script.is_none() { if let Some(region) = locale.region { let language = locale.language; - if let Some(script) = self.likely_subtags.lr2s.get_copied_2d( - &language.into_tinystr().to_unvalidated(), - ®ion.into_tinystr().to_unvalidated(), - ) { + if let Some(script) = self.likely_subtags.language_region.get_copied(&( + language.into_tinystr().to_unvalidated(), + region.into_tinystr().to_unvalidated(), + )) { locale.script = Some(script); self.restore_subdivision_variants(locale); return; diff --git a/components/locale/src/fallback/mod.rs b/components/locale/src/fallback/mod.rs index f82805d0f8a..cb7c9dca77a 100644 --- a/components/locale/src/fallback/mod.rs +++ b/components/locale/src/fallback/mod.rs @@ -58,21 +58,21 @@ mod algorithms; #[doc(hidden)] // canonical location in super #[derive(Debug, Clone, PartialEq)] pub struct LocaleFallbacker { - likely_subtags: DataPayload, + likely_subtags: DataPayload, parents: DataPayload, } /// Borrowed version of [`LocaleFallbacker`]. #[derive(Debug, Clone, Copy, PartialEq)] pub struct LocaleFallbackerBorrowed<'a> { - likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, + likely_subtags: &'a LikelySubtagsForLanguageV1<'a>, parents: &'a LocaleFallbackParentsV1<'a>, } /// A [`LocaleFallbackerBorrowed`] with an associated [`LocaleFallbackConfig`]. #[derive(Debug, Clone, Copy, PartialEq)] pub struct LocaleFallbackerWithConfig<'a> { - likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, + likely_subtags: &'a LikelySubtagsForLanguageV1<'a>, parents: &'a LocaleFallbackParentsV1<'a>, config: LocaleFallbackConfig, } @@ -80,7 +80,7 @@ pub struct LocaleFallbackerWithConfig<'a> { /// Inner iteration type. Does not own the item under fallback. #[derive(Debug)] struct LocaleFallbackIteratorInner<'a> { - likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, + likely_subtags: &'a LikelySubtagsForLanguageV1<'a>, parents: &'a LocaleFallbackParentsV1<'a>, config: LocaleFallbackConfig, backup_subdivision: Option, @@ -108,8 +108,7 @@ impl LocaleFallbacker { #[allow(clippy::new_ret_no_self)] // keeping constructors together pub const fn new<'a>() -> LocaleFallbackerBorrowed<'a> { let tickstatic = LocaleFallbackerBorrowed { - likely_subtags: - crate::provider::Baked::SINGLETON_LOCALE_FALLBACK_LIKELY_SUBTAGS_V1_MARKER, + likely_subtags: crate::provider::Baked::SINGLETON_LIKELY_SUBTAGS_FOR_LANGUAGE_V1_MARKER, parents: crate::provider::Baked::SINGLETON_LOCALE_FALLBACK_PARENTS_V1_MARKER, }; // Safety: we're transmuting down from LocaleFallbackerBorrowed<'static> to LocaleFallbackerBorrowed<'a> @@ -132,7 +131,7 @@ impl LocaleFallbacker { #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] pub fn try_new_unstable

(provider: &P) -> Result where - P: DataProvider + P: DataProvider + DataProvider + ?Sized, { @@ -148,7 +147,17 @@ impl LocaleFallbacker { /// surprising behavior, especially in multi-script languages. pub fn new_without_data() -> Self { LocaleFallbacker { - likely_subtags: DataPayload::from_owned(Default::default()), + likely_subtags: DataPayload::from_owned(LikelySubtagsForLanguageV1 { + language: Default::default(), + language_region: Default::default(), + language_script: Default::default(), + // Unused + und: ( + Default::default(), + crate::subtags::script!("Zzzz"), + crate::subtags::region!("ZZ"), + ), + }), parents: DataPayload::from_owned(Default::default()), } } diff --git a/components/locale/src/provider/fallback.rs b/components/locale/src/provider/fallback.rs index 4fc50473779..243383888c9 100644 --- a/components/locale/src/provider/fallback.rs +++ b/components/locale/src/provider/fallback.rs @@ -2,59 +2,10 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use super::*; -use icu_locale_core::subtags::{region, script, Language, Region, Script}; +use icu_locale_core::subtags::{Language, Region, Script}; use icu_provider::prelude::*; use zerovec::ule::UnvalidatedStr; use zerovec::ZeroMap; -use zerovec::ZeroMap2d; - -/// Locale fallback rules derived from likely subtags data. -#[icu_provider::data_struct(marker( - LocaleFallbackLikelySubtagsV1Marker, - "fallback/likelysubtags@1", - singleton -))] -#[derive(Default, Clone, PartialEq, Debug)] -#[cfg_attr( - feature = "datagen", - derive(serde::Serialize, databake::Bake), - databake(path = icu_locale::provider), -)] -#[cfg_attr(feature = "serde", derive(serde::Deserialize))] -#[yoke(prove_covariance_manually)] -pub struct LocaleFallbackLikelySubtagsV1<'data> { - /// Map from language to the default script in that language. Languages whose default script - /// is `Latn` are not included in the map for data size savings. - /// - /// Example: "zh" defaults to "Hans", which is in this map. - #[cfg_attr(feature = "serde", serde(borrow))] - pub l2s: ZeroMap<'data, UnvalidatedLanguage, Script>, - /// Map from language-region pairs to a script. Only populated if the script is different - /// from the one in `l2s` for that language. - /// - /// Example: "zh-TW" defaults to "Hant", which is in this map. - #[cfg_attr(feature = "serde", serde(borrow))] - pub lr2s: ZeroMap2d<'data, UnvalidatedLanguage, UnvalidatedRegion, Script>, - /// Map from language to the default region in that language. Languages whose default region - /// is `ZZ` are not included in the map for data size savings. - /// - /// Example: "zh" defaults to "CN". - #[cfg_attr(feature = "serde", serde(borrow))] - pub l2r: ZeroMap<'data, UnvalidatedLanguage, Region>, - /// Map from language-script pairs to a region. Only populated if the region is different - /// from the one in `l2r` for that language. - /// - /// Example: "zh-Hant" defaults to "TW". - #[cfg_attr(feature = "serde", serde(borrow))] - pub ls2r: ZeroMap2d<'data, UnvalidatedLanguage, UnvalidatedScript, Region>, -} - -/// `Latn` is the most common script, so it is defaulted for data size savings. -pub const DEFAULT_SCRIPT: Script = script!("Latn"); - -/// `ZZ` is the most common region, so it is defaulted for data size savings. -pub const DEFAULT_REGION: Region = region!("ZZ"); /// Locale fallback rules derived from CLDR parent locales data. #[icu_provider::data_struct(marker( diff --git a/components/locale/src/provider/mod.rs b/components/locale/src/provider/mod.rs index 9b07a27da6e..d6eb0e6ae35 100644 --- a/components/locale/src/provider/mod.rs +++ b/components/locale/src/provider/mod.rs @@ -44,7 +44,6 @@ const _: () = { pub use crate as locale; } make_provider!(Baked); - impl_locale_fallback_likely_subtags_v1_marker!(Baked); impl_locale_fallback_parents_v1_marker!(Baked); impl_aliases_v2_marker!(Baked); impl_likely_subtags_extended_v1_marker!(Baked); @@ -63,7 +62,6 @@ pub const MARKERS: &[DataMarkerInfo] = &[ LikelySubtagsExtendedV1Marker::INFO, LikelySubtagsForLanguageV1Marker::INFO, LikelySubtagsForScriptRegionV1Marker::INFO, - LocaleFallbackLikelySubtagsV1Marker::INFO, LocaleFallbackParentsV1Marker::INFO, ScriptDirectionV1Marker::INFO, ]; diff --git a/provider/data/locale/data/locale_fallback_likely_subtags_v1_marker.rs.data b/provider/data/locale/data/locale_fallback_likely_subtags_v1_marker.rs.data deleted file mode 100644 index 860a96d9768..00000000000 --- a/provider/data/locale/data/locale_fallback_likely_subtags_v1_marker.rs.data +++ /dev/null @@ -1,60 +0,0 @@ -// @generated -/// Implement `DataProvider` on the given struct using the data -/// hardcoded in this file. This allows the struct to be used with -/// `icu`'s `_unstable` constructors. -/// -/// Using this implementation will embed the following data in the binary's data segment: -/// * 2228B[^1] for the singleton data struct -/// -/// [^1]: these numbers can be smaller in practice due to linker deduplication -#[doc(hidden)] -#[macro_export] -macro_rules! __impl_locale_fallback_likely_subtags_v1_marker { - ($ provider : ty) => { - #[clippy::msrv = "1.70"] - const _: () = <$provider>::MUST_USE_MAKE_PROVIDER_MACRO; - #[clippy::msrv = "1.70"] - impl $provider { - #[doc(hidden)] - pub const SINGLETON_LOCALE_FALLBACK_LIKELY_SUBTAGS_V1_MARKER: &'static ::DataStruct = &icu::locale::provider::LocaleFallbackLikelySubtagsV1 { - l2s: unsafe { - #[allow(unused_unsafe)] - zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"am\0ar\0as\0be\0bg\0bgcbhobn\0brxchrcswcv\0doiel\0fa\0gu\0he\0hi\0hy\0ja\0ka\0kk\0km\0kn\0ko\0kokks\0ky\0lo\0maimk\0ml\0mn\0mnimr\0my\0ne\0nqoor\0pa\0ps\0rajru\0sa\0sahsatsd\0si\0sr\0syrta\0te\0tg\0th\0ti\0tt\0ug\0uk\0ur\0xnryuezh\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"EthiArabBengCyrlCyrlDevaDevaBengDevaCherCansCyrlDevaGrekArabGujrHebrDevaArmnJpanGeorCyrlKhmrKndaKoreDevaArabCyrlLaooDevaCyrlMlymCyrlBengDevaMymrDevaNkooOryaGuruArabDevaCyrlDevaCyrlOlckArabSinhCyrlSyrcTamlTeluCyrlThaiEthiCyrlArabCyrlArabDevaHantHans") }) - }, - lr2s: unsafe { - #[allow(unused_unsafe)] - zerovec::ZeroMap2d::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"az\0ha\0kk\0ku\0ky\0mn\0ms\0pa\0sd\0sr\0tg\0ug\0uz\0yuezh\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\x03\0\0\0\x05\0\0\0\t\0\0\0\n\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x10\0\0\0\x14\0\0\0\x15\0\0\0\x17\0\0\0\x19\0\0\0\x1A\0\0\0)\0\0\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"IQ\0IR\0RU\0CM\0SD\0AF\0CN\0IR\0MN\0LB\0CN\0TR\0CN\0CC\0PK\0IN\0ME\0RO\0RU\0TR\0PK\0KZ\0MN\0AF\0CN\0CN\0AU\0BN\0GB\0GF\0HK\0ID\0MO\0PA\0PF\0PH\0SR\0TH\0TW\0US\0VN\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ArabArabCyrlArabArabArabArabArabArabArabArabLatnMongArabArabDevaLatnLatnLatnLatnArabCyrlCyrlArabCyrlHansHantHantHantHantHantHantHantHantHantHantHantHantHantHantHant") }) - }, - l2r: unsafe { - #[allow(unused_unsafe)] - zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"af\0am\0ar\0as\0astaz\0be\0bg\0bgcbhoblobn\0br\0brxbs\0ca\0cebchrcs\0cswcv\0cy\0da\0de\0doidsbel\0en\0eo\0es\0et\0eu\0fa\0ff\0fi\0filfo\0fr\0fy\0ga\0gd\0gl\0gu\0ha\0he\0hi\0hr\0hsbhu\0hy\0ia\0id\0ie\0ig\0is\0it\0ja\0jv\0ka\0keakgpkk\0km\0kn\0ko\0kokks\0ku\0kxvky\0lb\0lijlmolo\0lt\0lv\0maimi\0mk\0ml\0mn\0mnimr\0ms\0mt\0my\0ndsne\0nl\0nn\0no\0nqooc\0or\0pa\0pcmpl\0prgps\0pt\0qu\0rajrm\0ro\0ru\0sa\0sahsatsc\0sd\0si\0sk\0sl\0so\0sq\0sr\0su\0sv\0sw\0syrszlta\0te\0tg\0th\0ti\0tk\0to\0tr\0tt\0ug\0uk\0ur\0uz\0vecvi\0vmwwo\0xh\0xnryo\0yrlyueza\0zh\0zu\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ZA\0ET\0EG\0IN\0ES\0AZ\0BY\0BG\0IN\0IN\0BJ\0BD\0FR\0IN\0BA\0ES\0PH\0US\0CZ\0CA\0RU\0GB\0DK\0DE\0IN\0DE\0GR\0US\x00001ES\0EE\0ES\0IR\0SN\0FI\0PH\0FO\0FR\0NL\0IE\0GB\0ES\0IN\0NG\0IL\0IN\0HR\0DE\0HU\0AM\x00001ID\0EE\0NG\0IS\0IT\0JP\0ID\0GE\0CV\0BR\0KZ\0KH\0IN\0KR\0IN\0IN\0TR\0IN\0KG\0LU\0IT\0IT\0LA\0LT\0LV\0IN\0NZ\0MK\0IN\0MN\0IN\0IN\0MY\0MT\0MM\0DE\0NP\0NL\0NO\0NO\0GN\0FR\0IN\0IN\0NG\0PL\0PL\0AF\0BR\0PE\0IN\0CH\0RO\0RU\0IN\0RU\0IN\0IT\0PK\0LK\0SK\0SI\0SO\0AL\0RS\0ID\0SE\0TZ\0IQ\0PL\0IN\0IN\0TJ\0TH\0ET\0TM\0TO\0TR\0RU\0CN\0UA\0PK\0UZ\0IT\0VN\0MZ\0SN\0ZA\0IN\0NG\0BR\0HK\0CN\0CN\0ZA\0") }) - }, - ls2r: unsafe { - #[allow(unused_unsafe)] - zerovec::ZeroMap2d::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"az\0en\0ff\0kk\0ku\0ky\0mn\0pa\0sd\0tg\0ug\0uz\0yuezh\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\x01\0\0\0\x02\0\0\0\x03\0\0\0\x04\0\0\0\x06\0\0\0\x08\0\0\0\t\0\0\0\n\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x10\0\0\0\x11\0\0\0\x14\0\0\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ArabShawAdlmArabArabYeziArabLatnMongArabDevaKhojSindArabCyrlArabHansBopoHanbHant") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"IR\0GB\0GN\0CN\0IQ\0GE\0CN\0TR\0CN\0PK\0IN\0IN\0IN\0PK\0KZ\0AF\0CN\0TW\0TW\0TW\0") }) - }, - }; - } - #[clippy::msrv = "1.70"] - impl icu_provider::DataProvider for $provider { - fn load(&self, req: icu_provider::DataRequest) -> Result, icu_provider::DataError> { - if req.id.locale.is_und() { - Ok(icu_provider::DataResponse { payload: icu_provider::DataPayload::from_static_ref(Self::SINGLETON_LOCALE_FALLBACK_LIKELY_SUBTAGS_V1_MARKER), metadata: Default::default() }) - } else { - Err(icu_provider::DataErrorKind::InvalidRequest.with_req(::INFO, req)) - } - } - } - }; - ($ provider : ty , ITER) => { - __impl_locale_fallback_likely_subtags_v1_marker!($provider); - #[clippy::msrv = "1.70"] - impl icu_provider::IterableDataProvider for $provider { - fn iter_ids(&self) -> Result>, icu_provider::DataError> { - Ok([Default::default()].into_iter().collect()) - } - } - }; -} -#[doc(inline)] -pub use __impl_locale_fallback_likely_subtags_v1_marker as impl_locale_fallback_likely_subtags_v1_marker; diff --git a/provider/data/locale/data/mod.rs b/provider/data/locale/data/mod.rs index f7c9aec7fa7..76a53d33cd2 100644 --- a/provider/data/locale/data/mod.rs +++ b/provider/data/locale/data/mod.rs @@ -1,5 +1,4 @@ // @generated -include!("locale_fallback_likely_subtags_v1_marker.rs.data"); include!("locale_fallback_parents_v1_marker.rs.data"); include!("aliases_v2_marker.rs.data"); include!("likely_subtags_extended_v1_marker.rs.data"); @@ -35,7 +34,6 @@ pub use __make_provider as make_provider; macro_rules! impl_data_provider { ($ provider : ty) => { make_provider!($provider); - impl_locale_fallback_likely_subtags_v1_marker!($provider); impl_locale_fallback_parents_v1_marker!($provider); impl_aliases_v2_marker!($provider); impl_likely_subtags_extended_v1_marker!($provider); @@ -51,7 +49,6 @@ macro_rules! impl_any_provider { impl icu_provider::any::AnyProvider for $provider { fn load_any(&self, marker: icu_provider::DataMarkerInfo, req: icu_provider::DataRequest) -> Result { match marker.path.hashed() { - h if h == ::INFO.path.hashed() => icu_provider::DataProvider::::load(self, req).map(icu_provider::DataResponse::wrap_into_any_response), h if h == ::INFO.path.hashed() => icu_provider::DataProvider::::load(self, req).map(icu_provider::DataResponse::wrap_into_any_response), h if h == ::INFO.path.hashed() => icu_provider::DataProvider::::load(self, req).map(icu_provider::DataResponse::wrap_into_any_response), h if h == ::INFO.path.hashed() => icu_provider::DataProvider::::load(self, req).map(icu_provider::DataResponse::wrap_into_any_response), diff --git a/provider/data/locale/fingerprints.csv b/provider/data/locale/fingerprints.csv index c346ffbe7a9..a5c8eeadaed 100644 --- a/provider/data/locale/fingerprints.csv +++ b/provider/data/locale/fingerprints.csv @@ -1,4 +1,3 @@ -fallback/likelysubtags@1, , 2228B, a6ec0551218ff716 fallback/parents@1, , 2597B, 8f3b5fb7bff27d97 locid_transform/aliases@2, , 8623B, 2c1368fc48eb6158 locid_transform/likelysubtags_ext@1, , 75788B, 681a300516da7f39 diff --git a/provider/data/locale/stubdata/locale_fallback_likely_subtags_v1_marker.rs.data b/provider/data/locale/stubdata/locale_fallback_likely_subtags_v1_marker.rs.data deleted file mode 100644 index 860a96d9768..00000000000 --- a/provider/data/locale/stubdata/locale_fallback_likely_subtags_v1_marker.rs.data +++ /dev/null @@ -1,60 +0,0 @@ -// @generated -/// Implement `DataProvider` on the given struct using the data -/// hardcoded in this file. This allows the struct to be used with -/// `icu`'s `_unstable` constructors. -/// -/// Using this implementation will embed the following data in the binary's data segment: -/// * 2228B[^1] for the singleton data struct -/// -/// [^1]: these numbers can be smaller in practice due to linker deduplication -#[doc(hidden)] -#[macro_export] -macro_rules! __impl_locale_fallback_likely_subtags_v1_marker { - ($ provider : ty) => { - #[clippy::msrv = "1.70"] - const _: () = <$provider>::MUST_USE_MAKE_PROVIDER_MACRO; - #[clippy::msrv = "1.70"] - impl $provider { - #[doc(hidden)] - pub const SINGLETON_LOCALE_FALLBACK_LIKELY_SUBTAGS_V1_MARKER: &'static ::DataStruct = &icu::locale::provider::LocaleFallbackLikelySubtagsV1 { - l2s: unsafe { - #[allow(unused_unsafe)] - zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"am\0ar\0as\0be\0bg\0bgcbhobn\0brxchrcswcv\0doiel\0fa\0gu\0he\0hi\0hy\0ja\0ka\0kk\0km\0kn\0ko\0kokks\0ky\0lo\0maimk\0ml\0mn\0mnimr\0my\0ne\0nqoor\0pa\0ps\0rajru\0sa\0sahsatsd\0si\0sr\0syrta\0te\0tg\0th\0ti\0tt\0ug\0uk\0ur\0xnryuezh\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"EthiArabBengCyrlCyrlDevaDevaBengDevaCherCansCyrlDevaGrekArabGujrHebrDevaArmnJpanGeorCyrlKhmrKndaKoreDevaArabCyrlLaooDevaCyrlMlymCyrlBengDevaMymrDevaNkooOryaGuruArabDevaCyrlDevaCyrlOlckArabSinhCyrlSyrcTamlTeluCyrlThaiEthiCyrlArabCyrlArabDevaHantHans") }) - }, - lr2s: unsafe { - #[allow(unused_unsafe)] - zerovec::ZeroMap2d::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"az\0ha\0kk\0ku\0ky\0mn\0ms\0pa\0sd\0sr\0tg\0ug\0uz\0yuezh\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\x03\0\0\0\x05\0\0\0\t\0\0\0\n\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x10\0\0\0\x14\0\0\0\x15\0\0\0\x17\0\0\0\x19\0\0\0\x1A\0\0\0)\0\0\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"IQ\0IR\0RU\0CM\0SD\0AF\0CN\0IR\0MN\0LB\0CN\0TR\0CN\0CC\0PK\0IN\0ME\0RO\0RU\0TR\0PK\0KZ\0MN\0AF\0CN\0CN\0AU\0BN\0GB\0GF\0HK\0ID\0MO\0PA\0PF\0PH\0SR\0TH\0TW\0US\0VN\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ArabArabCyrlArabArabArabArabArabArabArabArabLatnMongArabArabDevaLatnLatnLatnLatnArabCyrlCyrlArabCyrlHansHantHantHantHantHantHantHantHantHantHantHantHantHantHantHant") }) - }, - l2r: unsafe { - #[allow(unused_unsafe)] - zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"af\0am\0ar\0as\0astaz\0be\0bg\0bgcbhoblobn\0br\0brxbs\0ca\0cebchrcs\0cswcv\0cy\0da\0de\0doidsbel\0en\0eo\0es\0et\0eu\0fa\0ff\0fi\0filfo\0fr\0fy\0ga\0gd\0gl\0gu\0ha\0he\0hi\0hr\0hsbhu\0hy\0ia\0id\0ie\0ig\0is\0it\0ja\0jv\0ka\0keakgpkk\0km\0kn\0ko\0kokks\0ku\0kxvky\0lb\0lijlmolo\0lt\0lv\0maimi\0mk\0ml\0mn\0mnimr\0ms\0mt\0my\0ndsne\0nl\0nn\0no\0nqooc\0or\0pa\0pcmpl\0prgps\0pt\0qu\0rajrm\0ro\0ru\0sa\0sahsatsc\0sd\0si\0sk\0sl\0so\0sq\0sr\0su\0sv\0sw\0syrszlta\0te\0tg\0th\0ti\0tk\0to\0tr\0tt\0ug\0uk\0ur\0uz\0vecvi\0vmwwo\0xh\0xnryo\0yrlyueza\0zh\0zu\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ZA\0ET\0EG\0IN\0ES\0AZ\0BY\0BG\0IN\0IN\0BJ\0BD\0FR\0IN\0BA\0ES\0PH\0US\0CZ\0CA\0RU\0GB\0DK\0DE\0IN\0DE\0GR\0US\x00001ES\0EE\0ES\0IR\0SN\0FI\0PH\0FO\0FR\0NL\0IE\0GB\0ES\0IN\0NG\0IL\0IN\0HR\0DE\0HU\0AM\x00001ID\0EE\0NG\0IS\0IT\0JP\0ID\0GE\0CV\0BR\0KZ\0KH\0IN\0KR\0IN\0IN\0TR\0IN\0KG\0LU\0IT\0IT\0LA\0LT\0LV\0IN\0NZ\0MK\0IN\0MN\0IN\0IN\0MY\0MT\0MM\0DE\0NP\0NL\0NO\0NO\0GN\0FR\0IN\0IN\0NG\0PL\0PL\0AF\0BR\0PE\0IN\0CH\0RO\0RU\0IN\0RU\0IN\0IT\0PK\0LK\0SK\0SI\0SO\0AL\0RS\0ID\0SE\0TZ\0IQ\0PL\0IN\0IN\0TJ\0TH\0ET\0TM\0TO\0TR\0RU\0CN\0UA\0PK\0UZ\0IT\0VN\0MZ\0SN\0ZA\0IN\0NG\0BR\0HK\0CN\0CN\0ZA\0") }) - }, - ls2r: unsafe { - #[allow(unused_unsafe)] - zerovec::ZeroMap2d::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"az\0en\0ff\0kk\0ku\0ky\0mn\0pa\0sd\0tg\0ug\0uz\0yuezh\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\x01\0\0\0\x02\0\0\0\x03\0\0\0\x04\0\0\0\x06\0\0\0\x08\0\0\0\t\0\0\0\n\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x10\0\0\0\x11\0\0\0\x14\0\0\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ArabShawAdlmArabArabYeziArabLatnMongArabDevaKhojSindArabCyrlArabHansBopoHanbHant") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"IR\0GB\0GN\0CN\0IQ\0GE\0CN\0TR\0CN\0PK\0IN\0IN\0IN\0PK\0KZ\0AF\0CN\0TW\0TW\0TW\0") }) - }, - }; - } - #[clippy::msrv = "1.70"] - impl icu_provider::DataProvider for $provider { - fn load(&self, req: icu_provider::DataRequest) -> Result, icu_provider::DataError> { - if req.id.locale.is_und() { - Ok(icu_provider::DataResponse { payload: icu_provider::DataPayload::from_static_ref(Self::SINGLETON_LOCALE_FALLBACK_LIKELY_SUBTAGS_V1_MARKER), metadata: Default::default() }) - } else { - Err(icu_provider::DataErrorKind::InvalidRequest.with_req(::INFO, req)) - } - } - } - }; - ($ provider : ty , ITER) => { - __impl_locale_fallback_likely_subtags_v1_marker!($provider); - #[clippy::msrv = "1.70"] - impl icu_provider::IterableDataProvider for $provider { - fn iter_ids(&self) -> Result>, icu_provider::DataError> { - Ok([Default::default()].into_iter().collect()) - } - } - }; -} -#[doc(inline)] -pub use __impl_locale_fallback_likely_subtags_v1_marker as impl_locale_fallback_likely_subtags_v1_marker; diff --git a/provider/data/locale/stubdata/mod.rs b/provider/data/locale/stubdata/mod.rs index f7c9aec7fa7..76a53d33cd2 100644 --- a/provider/data/locale/stubdata/mod.rs +++ b/provider/data/locale/stubdata/mod.rs @@ -1,5 +1,4 @@ // @generated -include!("locale_fallback_likely_subtags_v1_marker.rs.data"); include!("locale_fallback_parents_v1_marker.rs.data"); include!("aliases_v2_marker.rs.data"); include!("likely_subtags_extended_v1_marker.rs.data"); @@ -35,7 +34,6 @@ pub use __make_provider as make_provider; macro_rules! impl_data_provider { ($ provider : ty) => { make_provider!($provider); - impl_locale_fallback_likely_subtags_v1_marker!($provider); impl_locale_fallback_parents_v1_marker!($provider); impl_aliases_v2_marker!($provider); impl_likely_subtags_extended_v1_marker!($provider); @@ -51,7 +49,6 @@ macro_rules! impl_any_provider { impl icu_provider::any::AnyProvider for $provider { fn load_any(&self, marker: icu_provider::DataMarkerInfo, req: icu_provider::DataRequest) -> Result { match marker.path.hashed() { - h if h == ::INFO.path.hashed() => icu_provider::DataProvider::::load(self, req).map(icu_provider::DataResponse::wrap_into_any_response), h if h == ::INFO.path.hashed() => icu_provider::DataProvider::::load(self, req).map(icu_provider::DataResponse::wrap_into_any_response), h if h == ::INFO.path.hashed() => icu_provider::DataProvider::::load(self, req).map(icu_provider::DataResponse::wrap_into_any_response), h if h == ::INFO.path.hashed() => icu_provider::DataProvider::::load(self, req).map(icu_provider::DataResponse::wrap_into_any_response), diff --git a/provider/registry/src/lib.rs b/provider/registry/src/lib.rs index e12ccda53d7..9ebb3562f88 100644 --- a/provider/registry/src/lib.rs +++ b/provider/registry/src/lib.rs @@ -79,7 +79,6 @@ macro_rules! registry( icu::locale::provider::LikelySubtagsExtendedV1Marker = "locid_transform/likelysubtags_ext@1", icu::locale::provider::LikelySubtagsForLanguageV1Marker = "locid_transform/likelysubtags_l@1", icu::locale::provider::LikelySubtagsForScriptRegionV1Marker = "locid_transform/likelysubtags_sr@1", - icu::locale::provider::LocaleFallbackLikelySubtagsV1Marker = "fallback/likelysubtags@1", icu::locale::provider::LocaleFallbackParentsV1Marker = "fallback/parents@1", icu::locale::provider::ScriptDirectionV1Marker = "locid_transform/script_dir@1", icu::normalizer::provider::CanonicalCompositionsV1Marker = "normalizer/comp@1", diff --git a/provider/source/data/debug/fallback/likelysubtags@1/und.json b/provider/source/data/debug/fallback/likelysubtags@1/und.json deleted file mode 100644 index 892716aa892..00000000000 --- a/provider/source/data/debug/fallback/likelysubtags@1/und.json +++ /dev/null @@ -1,337 +0,0 @@ -{ - "l2s": { - "am": "Ethi", - "ar": "Arab", - "as": "Beng", - "be": "Cyrl", - "bg": "Cyrl", - "bgc": "Deva", - "bho": "Deva", - "bn": "Beng", - "brx": "Deva", - "chr": "Cher", - "csw": "Cans", - "cv": "Cyrl", - "doi": "Deva", - "el": "Grek", - "fa": "Arab", - "gu": "Gujr", - "he": "Hebr", - "hi": "Deva", - "hy": "Armn", - "ja": "Jpan", - "ka": "Geor", - "kk": "Cyrl", - "km": "Khmr", - "kn": "Knda", - "ko": "Kore", - "kok": "Deva", - "ks": "Arab", - "ky": "Cyrl", - "lo": "Laoo", - "mai": "Deva", - "mk": "Cyrl", - "ml": "Mlym", - "mn": "Cyrl", - "mni": "Beng", - "mr": "Deva", - "my": "Mymr", - "ne": "Deva", - "nqo": "Nkoo", - "or": "Orya", - "pa": "Guru", - "ps": "Arab", - "raj": "Deva", - "ru": "Cyrl", - "sa": "Deva", - "sah": "Cyrl", - "sat": "Olck", - "sd": "Arab", - "si": "Sinh", - "sr": "Cyrl", - "syr": "Syrc", - "ta": "Taml", - "te": "Telu", - "tg": "Cyrl", - "th": "Thai", - "ti": "Ethi", - "tt": "Cyrl", - "ug": "Arab", - "uk": "Cyrl", - "ur": "Arab", - "xnr": "Deva", - "yue": "Hant", - "zh": "Hans" - }, - "lr2s": { - "az": { - "IQ": "Arab", - "IR": "Arab", - "RU": "Cyrl" - }, - "ha": { - "CM": "Arab", - "SD": "Arab" - }, - "kk": { - "AF": "Arab", - "CN": "Arab", - "IR": "Arab", - "MN": "Arab" - }, - "ku": { - "LB": "Arab" - }, - "ky": { - "CN": "Arab", - "TR": "Latn" - }, - "mn": { - "CN": "Mong" - }, - "ms": { - "CC": "Arab" - }, - "pa": { - "PK": "Arab" - }, - "sd": { - "IN": "Deva" - }, - "sr": { - "ME": "Latn", - "RO": "Latn", - "RU": "Latn", - "TR": "Latn" - }, - "tg": { - "PK": "Arab" - }, - "ug": { - "KZ": "Cyrl", - "MN": "Cyrl" - }, - "uz": { - "AF": "Arab", - "CN": "Cyrl" - }, - "yue": { - "CN": "Hans" - }, - "zh": { - "AU": "Hant", - "BN": "Hant", - "GB": "Hant", - "GF": "Hant", - "HK": "Hant", - "ID": "Hant", - "MO": "Hant", - "PA": "Hant", - "PF": "Hant", - "PH": "Hant", - "SR": "Hant", - "TH": "Hant", - "TW": "Hant", - "US": "Hant", - "VN": "Hant" - } - }, - "l2r": { - "af": "ZA", - "am": "ET", - "ar": "EG", - "as": "IN", - "ast": "ES", - "az": "AZ", - "be": "BY", - "bg": "BG", - "bgc": "IN", - "bho": "IN", - "blo": "BJ", - "bn": "BD", - "br": "FR", - "brx": "IN", - "bs": "BA", - "ca": "ES", - "ceb": "PH", - "chr": "US", - "cs": "CZ", - "csw": "CA", - "cv": "RU", - "cy": "GB", - "da": "DK", - "de": "DE", - "doi": "IN", - "dsb": "DE", - "el": "GR", - "en": "US", - "eo": "001", - "es": "ES", - "et": "EE", - "eu": "ES", - "fa": "IR", - "ff": "SN", - "fi": "FI", - "fil": "PH", - "fo": "FO", - "fr": "FR", - "fy": "NL", - "ga": "IE", - "gd": "GB", - "gl": "ES", - "gu": "IN", - "ha": "NG", - "he": "IL", - "hi": "IN", - "hr": "HR", - "hsb": "DE", - "hu": "HU", - "hy": "AM", - "ia": "001", - "id": "ID", - "ie": "EE", - "ig": "NG", - "is": "IS", - "it": "IT", - "ja": "JP", - "jv": "ID", - "ka": "GE", - "kea": "CV", - "kgp": "BR", - "kk": "KZ", - "km": "KH", - "kn": "IN", - "ko": "KR", - "kok": "IN", - "ks": "IN", - "ku": "TR", - "kxv": "IN", - "ky": "KG", - "lb": "LU", - "lij": "IT", - "lmo": "IT", - "lo": "LA", - "lt": "LT", - "lv": "LV", - "mai": "IN", - "mi": "NZ", - "mk": "MK", - "ml": "IN", - "mn": "MN", - "mni": "IN", - "mr": "IN", - "ms": "MY", - "mt": "MT", - "my": "MM", - "nds": "DE", - "ne": "NP", - "nl": "NL", - "nn": "NO", - "no": "NO", - "nqo": "GN", - "oc": "FR", - "or": "IN", - "pa": "IN", - "pcm": "NG", - "pl": "PL", - "prg": "PL", - "ps": "AF", - "pt": "BR", - "qu": "PE", - "raj": "IN", - "rm": "CH", - "ro": "RO", - "ru": "RU", - "sa": "IN", - "sah": "RU", - "sat": "IN", - "sc": "IT", - "sd": "PK", - "si": "LK", - "sk": "SK", - "sl": "SI", - "so": "SO", - "sq": "AL", - "sr": "RS", - "su": "ID", - "sv": "SE", - "sw": "TZ", - "syr": "IQ", - "szl": "PL", - "ta": "IN", - "te": "IN", - "tg": "TJ", - "th": "TH", - "ti": "ET", - "tk": "TM", - "to": "TO", - "tr": "TR", - "tt": "RU", - "ug": "CN", - "uk": "UA", - "ur": "PK", - "uz": "UZ", - "vec": "IT", - "vi": "VN", - "vmw": "MZ", - "wo": "SN", - "xh": "ZA", - "xnr": "IN", - "yo": "NG", - "yrl": "BR", - "yue": "HK", - "za": "CN", - "zh": "CN", - "zu": "ZA" - }, - "ls2r": { - "az": { - "Arab": "IR" - }, - "en": { - "Shaw": "GB" - }, - "ff": { - "Adlm": "GN" - }, - "kk": { - "Arab": "CN" - }, - "ku": { - "Arab": "IQ", - "Yezi": "GE" - }, - "ky": { - "Arab": "CN", - "Latn": "TR" - }, - "mn": { - "Mong": "CN" - }, - "pa": { - "Arab": "PK" - }, - "sd": { - "Deva": "IN", - "Khoj": "IN", - "Sind": "IN" - }, - "tg": { - "Arab": "PK" - }, - "ug": { - "Cyrl": "KZ" - }, - "uz": { - "Arab": "AF" - }, - "yue": { - "Hans": "CN" - }, - "zh": { - "Bopo": "TW", - "Hanb": "TW", - "Hant": "TW" - } - } -} diff --git a/provider/source/src/fallback/mod.rs b/provider/source/src/fallback/mod.rs index 75d6796e76f..9a7ed42725c 100644 --- a/provider/source/src/fallback/mod.rs +++ b/provider/source/src/fallback/mod.rs @@ -5,7 +5,6 @@ use crate::cldr_serde; use crate::SourceDataProvider; -use super::locale_canonicalizer::likely_subtags::LikelySubtagsResources; use icu::locale::provider::*; use icu::locale::{ subtags::{Language, Region, Script}, @@ -14,23 +13,7 @@ use icu::locale::{ use icu_provider::prelude::*; use std::collections::{BTreeMap, HashSet}; use writeable::Writeable; -use zerovec::{maps::ZeroMap2d, ule::UnvalidatedStr}; - -impl DataProvider for SourceDataProvider { - fn load( - &self, - req: DataRequest, - ) -> Result, DataError> { - self.check_req::(req)?; - let resources = LikelySubtagsResources::try_from_cldr_cache(self.cldr()?)?; - - let metadata = DataResponseMetadata::default(); - Ok(DataResponse { - metadata, - payload: DataPayload::from_owned(transform(resources.get_common())), - }) - } -} +use zerovec::ule::UnvalidatedStr; impl DataProvider for SourceDataProvider { fn load( @@ -51,90 +34,12 @@ impl DataProvider for SourceDataProvider { } } -impl crate::IterableDataProviderCached for SourceDataProvider { - fn iter_ids_cached(&self) -> Result>, DataError> { - Ok(HashSet::from_iter([Default::default()])) - } -} - impl crate::IterableDataProviderCached for SourceDataProvider { fn iter_ids_cached(&self) -> Result>, DataError> { Ok(HashSet::from_iter([Default::default()])) } } -fn transform<'x>( - it: impl Iterator + 'x, -) -> LocaleFallbackLikelySubtagsV1<'static> { - let mut l2s = BTreeMap::new(); - let mut lr2s = ZeroMap2d::new(); - let mut l2r = BTreeMap::new(); - let mut ls2r = ZeroMap2d::new(); - - let (part0, part1) = it - // Skip "und" for vertical fallback - .filter(|(lid, _)| !lid.language.is_empty()) - // Find language-only entries - .partition::, _>(|(lid, _)| **lid == LanguageIdentifier::from(lid.language)); - - // First collect the l2s and l2r maps - for (minimized, maximized) in part0.iter() { - let language = minimized.language; - let script = maximized.script.expect("maximized"); - let region = maximized.region.expect("maximized"); - if script != DEFAULT_SCRIPT { - l2s.insert(language.into_tinystr().to_unvalidated(), script); - } - if region != DEFAULT_REGION { - l2r.insert(language.into_tinystr().to_unvalidated(), region); - } - } - - // Now populate the other maps - for (minimized, maximized) in part1.iter() { - let language = maximized.language; - let script = maximized.script.expect("maximized"); - let region = maximized.region.expect("maximized"); - if minimized.script.is_some() { - assert!(minimized.region.is_none(), "{minimized:?}"); - let region_for_lang = l2r - .get(&language.into_tinystr().to_unvalidated()) - .copied() - .unwrap_or(DEFAULT_REGION); - if region != region_for_lang { - ls2r.insert( - &language.into_tinystr().to_unvalidated(), - &script.into_tinystr().to_unvalidated(), - ®ion, - ); - } - continue; - } - if minimized.region.is_some() { - let script_for_lang = l2s - .get(&language.into_tinystr().to_unvalidated()) - .copied() - .unwrap_or(DEFAULT_SCRIPT); - if script != script_for_lang { - lr2s.insert( - &language.into_tinystr().to_unvalidated(), - ®ion.into_tinystr().to_unvalidated(), - &script, - ); - } - continue; - } - unreachable!(); - } - - LocaleFallbackLikelySubtagsV1 { - l2s: l2s.into_iter().collect(), - lr2s, - l2r: l2r.into_iter().collect(), - ls2r, - } -} - impl From<&cldr_serde::parent_locales::Resource> for LocaleFallbackParentsV1<'static> { fn from(source_data: &cldr_serde::parent_locales::Resource) -> Self { let mut parents = BTreeMap::<_, (Language, Option