From 06bd8ce780018ac823901283a3a706a98ec121ca Mon Sep 17 00:00:00 2001 From: "Jack Horton (CHAKRA)" Date: Fri, 7 Sep 2018 16:45:12 -0700 Subject: [PATCH] Implement Intl.Locale --- lib/Parser/rterrors.h | 1 + lib/Runtime/Base/JnDirectFields.h | 2 +- .../Library/Chakra.Runtime.Library.vcxproj | 3 +- .../Chakra.Runtime.Library.vcxproj.filters | 5 +- lib/Runtime/Library/EngineInterfaceObject.cpp | 20 +- .../Library/EngineInterfaceObjectBuiltIns.h | 9 +- lib/Runtime/Library/InJavascript/Intl.js | 722 +++++++++++++++++- .../IntlEngineInterfaceExtensionObject.cpp | 3 +- test/Intl/Locale.js | 67 ++ test/Intl/rlexe.xml | 6 + 10 files changed, 812 insertions(+), 26 deletions(-) create mode 100644 test/Intl/Locale.js diff --git a/lib/Parser/rterrors.h b/lib/Parser/rterrors.h index ca429352a64..98cfaa2b4c1 100755 --- a/lib/Parser/rterrors.h +++ b/lib/Parser/rterrors.h @@ -257,6 +257,7 @@ RT_ERROR_MSG(JSERR_MissingCurrencyCode, 5123, "", "Currency code was not specifi RT_ERROR_MSG(JSERR_InvalidDate, 5124, "", "Invalid Date", kjstRangeError, 0) RT_ERROR_MSG(JSERR_IntlNotAvailable, 5125, "", "Intl is not available.", kjstTypeError, 0) RT_ERROR_MSG(JSERR_IntlNotImplemented, 5126, "", "Intl operation '%s' is not implemented.", kjstTypeError, 0) +RT_ERROR_MSG(JSERR_InvalidPrivateOrGrandfatheredTag, 5127, "", "The arguments provided to Intl.Locale form an invalid privateuse or grandfathered language tag", kjstRangeError, 0) RT_ERROR_MSG(JSERR_ArgumentOutOfRange, 5130, "%s: argument out of range", "argument out of range", kjstRangeError, 0) RT_ERROR_MSG(JSERR_ErrorOnNew, 5131, "", "Function is not a constructor", kjstTypeError, 0) diff --git a/lib/Runtime/Base/JnDirectFields.h b/lib/Runtime/Base/JnDirectFields.h index 8d9aef908b3..b716598e120 100644 --- a/lib/Runtime/Base/JnDirectFields.h +++ b/lib/Runtime/Base/JnDirectFields.h @@ -671,7 +671,6 @@ ENTRY(builtInJavascriptObjectEntryIsExtensible) ENTRY(builtInJavascriptObjectEntryKeys) ENTRY(builtInJavascriptObjectGetOwnPropertyDescriptor) ENTRY(builtInJavascriptObjectPreventExtensions) -ENTRY(builtInJavascriptRegExpEntryTest) // TODO(jahorto): is this needed? ENTRY(builtInJavascriptStringEntryIndexOf) ENTRY(builtInJavascriptStringEntryMatch) ENTRY(builtInJavascriptStringEntryRepeat) @@ -708,6 +707,7 @@ ENTRY(raiseOptionValueOutOfRange_3) ENTRY(raiseOptionValueOutOfRange) ENTRY(raiseThis_NullOrUndefined) ENTRY(raiseFunctionArgument_NeedFunction) +ENTRY(raiseInvalidPrivateOrGrandfatheredTag) // Promise (ChakraFull) ENTRY(Promise) diff --git a/lib/Runtime/Library/Chakra.Runtime.Library.vcxproj b/lib/Runtime/Library/Chakra.Runtime.Library.vcxproj index 2ad8484e353..dfb82b85d36 100644 --- a/lib/Runtime/Library/Chakra.Runtime.Library.vcxproj +++ b/lib/Runtime/Library/Chakra.Runtime.Library.vcxproj @@ -157,6 +157,7 @@ + @@ -326,4 +327,4 @@ - + \ No newline at end of file diff --git a/lib/Runtime/Library/Chakra.Runtime.Library.vcxproj.filters b/lib/Runtime/Library/Chakra.Runtime.Library.vcxproj.filters index 40decfc9e0e..315b6fa5430 100644 --- a/lib/Runtime/Library/Chakra.Runtime.Library.vcxproj.filters +++ b/lib/Runtime/Library/Chakra.Runtime.Library.vcxproj.filters @@ -226,6 +226,9 @@ + + + @@ -286,4 +289,4 @@ - + \ No newline at end of file diff --git a/lib/Runtime/Library/EngineInterfaceObject.cpp b/lib/Runtime/Library/EngineInterfaceObject.cpp index 344951a2b8b..6ce16f68f37 100644 --- a/lib/Runtime/Library/EngineInterfaceObject.cpp +++ b/lib/Runtime/Library/EngineInterfaceObject.cpp @@ -170,7 +170,7 @@ namespace Js // to share the common APIs without requiring everyone to access EngineInterfaceObject.Common. this->commonNativeInterfaces = DynamicObject::New(recycler, DynamicType::New(scriptContext, TypeIds_Object, library->GetObjectPrototype(), nullptr, - DeferredTypeHandler::GetDefaultInstance())); + DeferredTypeHandler::GetDefaultInstance())); library->AddMember(this, Js::PropertyIds::Common, this->commonNativeInterfaces); for (uint i = 0; i <= MaxEngineInterfaceExtensionKind; i++) @@ -210,15 +210,9 @@ namespace Js JavascriptLibrary* library = commonNativeInterfaces->GetScriptContext()->GetLibrary(); -#ifndef GlobalBuiltIn -#define GlobalBuiltIn(global, method) \ - library->AddFunctionToLibraryObject(commonNativeInterfaces, Js::PropertyIds::builtIn##global##method, &EngineInterfaceObject::EntryInfo::BuiltIn_##global##_##method##, 1); \ - +#define GlobalBuiltIn(global, method) library->AddFunctionToLibraryObject(commonNativeInterfaces, Js::PropertyIds::builtIn##global##method, &EngineInterfaceObject::EntryInfo::BuiltIn_##global##_##method##, 1); #define GlobalBuiltInConstructor(global) SetPropertyOn(commonNativeInterfaces, Js::PropertyIds::##global##, library->Get##global##Constructor()); - -#define BuiltInRaiseException(exceptionType, exceptionID) \ - library->AddFunctionToLibraryObject(commonNativeInterfaces, Js::PropertyIds::raise##exceptionID, &EngineInterfaceObject::EntryInfo::BuiltIn_raise##exceptionID, 1); \ - +#define BuiltInRaiseException(exceptionType, exceptionID) library->AddFunctionToLibraryObject(commonNativeInterfaces, Js::PropertyIds::raise##exceptionID, &EngineInterfaceObject::EntryInfo::BuiltIn_raise##exceptionID, 1); #define BuiltInRaiseException1(exceptionType, exceptionID) BuiltInRaiseException(exceptionType, exceptionID) #define BuiltInRaiseException2(exceptionType, exceptionID) BuiltInRaiseException(exceptionType, exceptionID) #define BuiltInRaiseException3(exceptionType, exceptionID) BuiltInRaiseException(exceptionType, exceptionID##_3) @@ -231,7 +225,7 @@ namespace Js #undef BuiltInRaiseException3 #undef GlobalBuiltIn #undef GlobalBuiltInConstructor -#endif + library->AddFunctionToLibraryObject(commonNativeInterfaces, Js::PropertyIds::builtInJavascriptObjectCreate, &JavascriptObject::EntryInfo::Create, 1); library->AddFunctionToLibraryObject(commonNativeInterfaces, Js::PropertyIds::builtInJavascriptObjectPreventExtensions, &JavascriptObject::EntryInfo::PreventExtensions, 1); library->AddFunctionToLibraryObject(commonNativeInterfaces, Js::PropertyIds::builtInJavascriptObjectGetOwnPropertyDescriptor, &JavascriptObject::EntryInfo::GetOwnPropertyDescriptor, 1); @@ -266,7 +260,7 @@ namespace Js int hr = Js::JavascriptConversion::ToInt32(args[1], scriptContext); int resourceId; - switch(hr) + switch (hr) { case ASYNCERR_NoErrorInErrorState: resourceId = 5200; @@ -285,7 +279,7 @@ namespace Js const int strLength = 1024; OLECHAR errorString[strLength]; - if(FGetResourceString(resourceId, errorString, strLength)) + if (FGetResourceString(resourceId, errorString, strLength)) { return Js::JavascriptString::NewCopySz(errorString, scriptContext); } @@ -440,7 +434,7 @@ namespace Js Var newVars[3]; Js::Arguments newArgs(callInfo, newVars); - for (uint i = 0; i _.arrayIndexOf(array, v) === i); }, + any(array, func) { + for (let i = 0; i < array.length; i++) { + if (func(array[i], i)) { + return true; + } + } + + return false; + }, + sort(array, sortCallback) { + for (let i = 0; i < array.length; i++) { + for (let j = i; j < array.length; j++) { + const cond = sortCallback ? sortCallback(array[i], array[j]) : array[i] < array[j]; + if (cond > 0) { + const temp = array[i]; + array[i] = array[j]; + array[j] = temp; + } + } + } + }, keys: platform.builtInJavascriptObjectEntryKeys, hasOwnProperty(o, prop) { return callInstanceFunc(platform.builtInJavascriptObjectEntryHasOwnProperty, o, prop); }, @@ -719,10 +740,7 @@ } const canonicalizedTag = platform.normalizeLanguageTag(tag); - if (canonicalizedTag === undefined) { - // See comment in platform.normalizeLanguageTag about when this happens - platform.raiseLocaleNotWellFormed(tag); - } else if (_.arrayIndexOf(seen, canonicalizedTag) === -1) { + if (_.arrayIndexOf(seen, canonicalizedTag) === -1) { _.push(seen, canonicalizedTag); } } @@ -2024,6 +2042,702 @@ return PluralRules; })(); + if (InitType === "Intl") { + + // Language Tag Syntax as described in RFC 5646 #section-2.1 + // Note: All language tags are comprised only of ASCII characters (makes our job easy here) + // Note: Language tags in canonical form have case conventions, but language tags are case-insensitive for our purposes + + // Note: The ABNF syntax used in RFC 5646 #section-2.1 uses the following numeric quantifier conventions: + // - (Parentheses) are used for grouping + // - PRODUCTION => exactly 1 of PRODUCTION /PRODUCTION/ + // - [PRODUCTION] => 0 or 1 of PRODUCTION /(PRODUCTION)?/ + // - #PRODUCTION => exactly # of PRODUCTION /(PRODUCTION){#}/ + // - a*bPRODUCTION (where a and b are optional) + // - *PRODUCTION => any number of PRODUCTION /(PRODUCTION)*/ + // - 1*PRODUCTION => 1 or more of PRODUCTION /(PRODUCTION)+/ + // - #*PRODUCTION => # or more of PRODUCTION /(PRODUCTION){#,}/ + // - *#PRODUCTION => 0 to # (inclusive) of PRODUCTION /(PRODUCTION){,#}/ or /(PRODUCTION){0,#}/ + // - a*bPRODUCTION => a to b (inclusive) of PRODUCTION /(PRODUCTION){a,b}/ + + const ALPHA = "[A-Z]"; + const DIGIT = "[0-9]"; + const alphanum = `(?:${ALPHA}|${DIGIT})`; + + const regularREString = "\\b(?:art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)\\b"; + const irregularREString = "\\b(?:en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo" + + "|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)\\b"; + const grandfatheredREString = `\\b(?:${regularREString}|${irregularREString})\\b`; + + const privateuseREString = `\\b(?:x(?:-${alphanum}{1,8}\\b)+)\\b`; // privateuse = "x" 1*("-" (1*8alphanum)) + const singletonREString = `\\b(?:${DIGIT}|[A-WY-Z])\\b`; // singleton ~= alphanum except for 'x' ; (paraphrased) + const extensionREString = `\\b(?:${singletonREString}(?:-${alphanum}{2,8})+)\\b`; // extension = singleton 1*("-" (2*8alphanum)) + const variantREString = `\\b(?:${alphanum}{5,8}|${DIGIT}${alphanum}{3})\\b`; // variant = 5*8alphanum / (DIGIT 3alphanum) + const regionREString = `\\b(?:${ALPHA}{2}|${DIGIT}{3})\\b`; // region = 2ALPHA / 3DIGIT + + const scriptREString = `\\b(?:${ALPHA}{4})\\b`; // script = 4ALPHA + const extlangREString = `\\b(?:${ALPHA}{3}\\b(?:-${ALPHA}{3}){0,2})\\b`; // extlang = 3ALPHA *2("-" 3ALPHA) + + const languageREString = '\\b(?:' + // language = + `${ALPHA}{2,3}` + // 2*3ALPHA ; shortest ISO 639 code + `\\b(?:-${extlangREString})?` + // ["-" extlang] ; sometimes followed by extended language subtags + // `|${ALPHA}{4}` + // / 4ALPHA ; or reserved for future use + // `|${ALPHA}{5,8}` + // / 5*8ALPHA ; or registered language subtag + `|${ALPHA}{4,8}` + // ~/ 4*8ALPHA ; (paraphrased: combined previous two lines) + ')\\b'; + + // Use matching groups only at the langtag level. This makes it clear what $1, $2, etc are when calling _.match + // NOTE: the leading "-" is matched for variant and extension, but not for any of the other groups. + const langtagREString = `\\b(${languageREString})\\b` + // langtag = language + `\\b(?:-(${scriptREString}))?\\b` + // ["-" script] + `\\b(?:-(${regionREString}))?\\b` + // ["-" region] + `\\b((?:-${variantREString})*)\\b` + // *("-" variant) + `\\b((?:-${extensionREString})*)\\b` + // *("-" extension) + `\\b(?:-(${privateuseREString}))?\\b` ; // ["-" privateuse] + + // Use ^ and $ to enforce that the entire input string is a langtag + const langtagRE = new platform.RegExp(`^${langtagREString}$`, "i"); + const grandfatheredRE = new platform.RegExp(`^${grandfatheredREString}$`, "i"); + const privateuseRE = new platform.RegExp(`^${privateuseREString}$`, "i"); + const grandfatheredOrPrivateuseRE = new platform.RegExp(`^(?:${grandfatheredREString}|${privateuseREString})$`, "i"); + const languageOptionRE = new platform.RegExp(`^${languageREString}$`, "i"); + const scriptOptionRE = new platform.RegExp(`^${scriptREString}$`, "i"); + const regionOptionRE = new platform.RegExp(`^${regionREString}$`, "i"); + + const langtagToPartsCache = new IntlCache(); + const langtagToParts = function (langtag) { + const cached = langtagToPartsCache.get(langtag); + if (cached) { + return cached; + } + + const ret = _.create(); + let parts = _.match(langtag, langtagRE); + if (!parts) { + parts = _.match(langtag, grandfatheredRE); + if (!parts) { + parts = _.match(langtag, privateuseRE); + if (!parts) { + return null; + } else { + ret.privateuseTag = langtag; + } + } else { + ret.grandfatheredTag = langtag; + } + } + + langtagToPartsCache.set(langtag, ret); + + if (ret.privateuseTag || ret.grandfatheredTag) { + ret.isGrandfatheredOrPrivateuseTag = true; + return ret; + } + + ret.language = parts[1]; + ret.base = parts[1]; + if (parts[2]) { + ret.script = parts[2]; + ret.base += "-" + parts[2]; + } + + if (parts[3]) { + ret.region = parts[3]; + ret.base += "-" + parts[3]; + } + + if (parts[4]) { + ret.variants = parts[4]; + // leading "-" is already in parts[4] + ret.base += parts[4]; + } + + if (parts[5]) { + ret.extensions = parts[5]; + + // parse the extension to find the unicode (-u) extension + const extensionParts = _.split(parts[5], "-"); + for (let unicodeExtensionStart = 0; unicodeExtensionStart < extensionParts.length; ++unicodeExtensionStart) { + if (extensionParts[unicodeExtensionStart] !== "u") { + continue; + } + + let unicodeExtensionsEnd; + for (unicodeExtensionsEnd = unicodeExtensionStart + 1; unicodeExtensionsEnd < extensionParts.length && extensionParts[unicodeExtensionsEnd].length > 1; unicodeExtensionsEnd++) { + // do nothing, we just want k to equal the index of the next element whose length is 1 + // or to equal the length of extensionParts + // We could have done this with Array.prototype.findIndex too + } + + if (unicodeExtensionsEnd > unicodeExtensionStart + 1) { + // this creates u-(keys and values)*, which is good enough for the UnicodeExtensionValue. UnicodeExtensionComponents, on the other hand, + // requires -u-keys-and-values with the leading -. UnicodeExtensionComponents knows to add "-" to the start if its missing. + ret.unicodeExtension = _.join(_.slice(extensionParts, unicodeExtensionStart, unicodeExtensionsEnd), "-"); + } + + // if we have gotten this far, we have found -u-{values}, so we can break + break; + } + } + + if (parts[6]) { + ret.privateuse = parts[6]; + } + + return ret; + }; + + // no cache because IntlCache requires keys to be === rather than structurally equal + const partsToLangtag = function (parts) { + if (parts.isGrandfatheredOrPrivateuseTag) { + return parts.privateuseTag || parts.grandfatheredTag; + } + + let langtag = parts.language; + + if (parts.script) { + langtag += `-${parts.script}`; + } + + if (parts.region) { + langtag += `-${parts.region}`; + } + + if (parts.variants) { + langtag += parts.variants; + } + + if (parts.extensions) { + langtag += parts.extensions; + } + + if (parts.privateuse) { + langtag += `-${parts.privateuse}`; + } + + return langtag; + }; + + // Locale helper functions + + const getAndValidateOption = function (options, key, productionRegex, productionName) { + productionName = productionName || key; + const value = GetOption(options, key, "string", undefined, undefined); + if (value !== undefined && _.match(value, productionRegex) === null) { + platform.raiseOptionValueOutOfRange_3(value, key, `RFC5646 ${productionName}`); + } + return value; + }; + + // see steps in #sec-Intl.Locale that look like + // "If calendar does not match the (3*8alphanum) *("-" (3*8alphanum)) sequence, throw a RangeError exception." + const extensionOptionRE = /^[a-z0-9]{3,8}(?:-[a-z0-9]{3,8})*$/i; + const getAndValidateExtensionOption = function (options, key) { + return getAndValidateOption(options, key, extensionOptionRE, "extension"); + }; + + const getLocaleInternalsObject = function (loc, methodName) { + if (typeof loc !== "object") { + platform.raiseNeedObjectOfType(`Intl.Locale.prototype.${methodName}`, "Intl.Locale"); + } + + const locInternals = platform.getHiddenObject(loc); + if (typeof locInternals !== "object" || !locInternals.initializedLocale) { + platform.raiseNeedObjectOfType(`Intl.Locale.prototype.${methodName}`, "Intl.Locale"); + } + + return locInternals; + }; + + const minMaxImpl = function (loc, methodName) { + const locInternals = getLocaleInternalsObject(loc, methodName); + const minimaximal = platform[`${methodName}Locale`](locInternals); + return new Locale(minimaximal); + }; + + const generateUTS35Extension = function (attributes, keywords) { + _.sort(attributes); + _.sort(keywords, (l, r) => l.key < r.key ? -1 : l.key > r.key ? 1 : 0); + _.forEach(keywords, (keyword) => { + keyword.key = _.toLowerCase(keyword.key); + if (keyword.value === "true") { + keyword.value = undefined; + } else { + keyword.value = _.toLowerCase(keyword.value); + } + }); + + let extension = ""; + _.forEach(attributes, (attr) => extension += `-${attr}`); + _.forEach(keywords, (kw) => { + extension += `-${kw.key}`; + if (kw.value) { + extension += `-${kw.value}`; + } + }); + + if (extension.length > 0) { + extension = `u${extension}`; + } + + return extension; + }; + + // End helper functions, begin abstract operations + + const ApplyOptionsToTag = function (tag, options) { + // 2. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + if (!IsStructurallyValidLanguageTag(tag)) { + platform.raiseLocaleNotWellFormed(tag); + } + + // 3. Let language be ? GetOption(options, "language", "string", undefined, undefined). + // 4. If language is not undefined, then + // a. If language does not match the language production, throw a RangeError exception. + // b. If language matches the grandfathered production, throw a RangeError exception. + const language = getAndValidateOption(options, "language", languageOptionRE, "language"); + if (language !== undefined && _.match(language, grandfatheredRE) !== null) { + platform.raiseInvalidPrivateOrGrandfatheredTag(); + } + + // 5. Let script be ? GetOption(options, "script", "string", undefined, undefined). + // 6. If script is not undefined, then + // a. If script does not match the script production, throw a RangeError exception. + // 7. Let region be ? GetOption(options, "region", "string", undefined, undefined). + // 8. If region is not undefined, then + // a. If region does not match the region production, throw a RangeError exception. + const script = getAndValidateOption(options, "script", scriptOptionRE, "script"); + const region = getAndValidateOption(options, "region", regionOptionRE, "region"); + + // 9. If tag matches the grandfathered production, + // a. Set tag to CanonicalizeLanguageTag(tag). + if (langtagToParts(tag).grandfatheredTag) { + tag = platform.normalizeLanguageTag(tag); + } + + // 10. If language is not undefined, + // a. If tag matches the privateuse or grandfathered production, + // i. Set tag to language. + // ii. If tag matches the grandfathered production, + // 1. Set tag to CanonicalizeLanguageTag(tag). + // b. Else, + // i. Assert: tag matches the langtag production. + // ii. Set tag to tag with the substring corresponding to the language production replaced by the string language. + if (language !== undefined) { + if (langtagToParts(tag).isGrandfatheredOrPrivateuseTag) { + tag = language; + if (langtagToParts(tag).grandfatheredTag) { + tag = platform.normalizeLanguageTag(tag); + } + } else { + const parts = langtagToParts(tag); + parts.language = language; + tag = partsToLangtag(parts); + } + } + + // 11. If tag matches the privateuse or grandfathered production, + // a. If script is not undefined, or if region is not undefined, throw a RangeError exception. + // 12. Else, + // a. If script is not undefined, then + // i. If tag does not contain a script production, then + // 1. Set tag to the concatenation of the language production of tag, "-", script, and the rest of tag. + // ii. Else, + // 1. Set tag to tag with the substring corresponding to the script production replaced by the string script. + // b. If region is not undefined, then + // i. If tag does not contain a region production, then + // 1. Set tag to the concatenation of the language production of tag, the substring corresponding to the "-" script production if present, "-", region, and the rest of tag. + // ii. Else, + // 1. Set tag to tag with the substring corresponding to the region production replaced by the string region. + if (langtagToParts(tag).isGrandfatheredOrPrivateuseTag) { + if (script !== undefined || region !== undefined) { + platform.raiseInvalidPrivateOrGrandfatheredTag(); + } + } else { + const langtagParts = langtagToParts(tag); + if (script !== undefined) { + langtagParts.script = script; + } + + if (region !== undefined) { + langtagParts.region = region; + } + + tag = partsToLangtag(langtagParts); + } + + // 13. Return CanonicalizeLanguageTag(tag). + return platform.normalizeLanguageTag(tag); + }; + + const ApplyUnicodeExtensionToTag = function (tag, options, relevantExtensionKeys) { + const result = _.create(); + + // 2. If tag matches the privateuse or the grandfathered production, then + // a. Let result be a new Record. + // b. Repeat for each element key of relevantExtensionKeys in List order, + // i. Set result.[[]] to undefined. + // c. Set result.[[locale]] to tag. + // d. Return result. + if (langtagToParts(tag).isGrandfatheredOrPrivateuseTag) { + result.locale = tag; + return result; + } + + // 4. If tag contains a substring that is a Unicode locale extension sequence, then + // a. Let extension be the String value consisting of the first substring of tag that is a Unicode locale extension sequence. + // b. Let components be ! UnicodeExtensionComponents(extension). + // c. Let attributes be components.[[Attributes]]. + // d. Let keywords be components.[[Keywords]]. + // 5. Else, + // a. Let attributes be the empty List. + // b. Let keywords be the empty List. + let attributes; + let keywords; + const unicodeExtension = langtagToParts(tag).unicodeExtension; + if (unicodeExtension !== undefined) { + const extension = unicodeExtension; + const components = UnicodeExtensionComponents(extension); + attributes = components.attributes; + keywords = components.keywords; + } else { + attributes = []; + keywords = []; + } + + // 7. Repeat for each element key of relevantExtensionKeys in List order, + for (let i = 0; i < relevantExtensionKeys.length; i++) { + // a. Let value be undefined. + // b. If keywords contains an element whose [[Key]] is the same as key, then + // i. Let entry be the element of keywords whose [[Key]] is the same as key. + // ii. Let value be entry.[[Value]]. + // c. Else + // i. Let entry be empty. + const key = relevantExtensionKeys[i]; + let value; + let entry; + for (let k = 0; k < keywords.length; k++) { + const keyword = keywords[k]; + if (keyword.key === key) { + entry = keyword; + value = keyword.value; + break; + } + } + + // e. Let optionsValue be options.[[]]. + // f. If optionsValue is not undefined, then + // i. Assert: Type(optionsValue) is String. + // ii. Let value be optionsValue. + // iii. If entry is not empty, then + // 1. Set entry.[[Value]] to value. + // iv. Else, + // 1. Append the Record{[[Key]]: key, [[Value]]: value} to keywords. + const optionsValue = options[key]; + if (optionsValue !== undefined) { + value = optionsValue; + if (entry !== undefined) { + entry.value = value; + } else { + const newKeyword = _.create(); + newKeyword.key = key; + newKeyword.value = value; + _.push(keywords, newKeyword); + } + } + + // g. Set result.[[]] to value. + result[key] = value; + } + + // 8. Let locale be the String value that is tag with all Unicode locale extension sequences removed. + const langtagParts = langtagToParts(tag); + const savedExtension = langtagParts.extensions; + langtagParts.extensions = langtagParts.unicodeExtension ? _.replace(savedExtension, "-" + langtagParts.unicodeExtension, "") : savedExtension; + let locale = partsToLangtag(langtagParts); + langtagParts.extensions = savedExtension; + + // 9. Let newExtension be the canonicalized Unicode BCP 47 U Extension based on attributes and keywords as defined in UTS #35 section 3.6. + const newExtension = generateUTS35Extension(attributes, keywords); + + // 10. If newExtension is not the empty String, then + // a. Let locale be ? InsertUnicodeExtension(locale, newExtension). + if (newExtension) { + locale = InsertUnicodeExtension(locale, newExtension); + } + + // 11. Set result.[[locale]] to locale. + // 12. Return result. + result.locale = locale; + return result; + }; + + const UnicodeExtensionComponents = function (extension) { + // 1. Let attributes be the empty List. + // 2. Let keywords be the empty List. + // 3. Let isKeyword be false. + // 4. Let size be the number of elements in extension. + // 5. Let k be 3. + const attributes = []; + const keywords = []; + let isKeyword = false; + let size = extension.length; + let k = 3; + + // by starting K at 3, this algorithm expects extension to match "-u-..." with the leading "-" + // parseLangtag/langtagToParts doesn't provide that by default because the existing algorithm used + // by the rest of the constructors, UnicodeExtensionValue, expects only "u-..." + // Normalize the behavior here. + if (size > 0 && extension[0] === "u") { + size++; + extension = "-" + extension; + } + + let key; + let value; + // 6. Repeat, while k < size + while (k < size) { + // a. Let e be ! Call(%StringProto_indexOf%, extension, << "-", k >>). + const e = _.stringIndexOf(extension, "-", k); + + // b. If e = -1, let len be size - k; else let len be e - k. + const len = e === -1 ? size - k : e - k; + + // c. Let subtag the String value equal to the substring of extension consisting of the code units at indices k (inclusive) through k + len (exclusive). + const subtag = _.substring(extension, k, k + len); + + // d. If isKeyword is false, then + // i. If len != 2 and subtag is not an element of attributes, then + // 1. Append subtag to attributes. + // e. Else, + if (!isKeyword) { + if (len !== 2 && _.arrayIndexOf(attributes, subtag) === -1) { + _.push(attributes, subtag); + } + } else { + // i. If len = 2, then + // 1. If keywords does not contain an element whose [[Key]] is the same as key, then + // a. Append the Record{[[Key]]: key, [[Value]]: value} to keywords. + // ii. Else, + // 1. If value is not the empty String, then + // a. Let value be the string-concatenation of value and "-". + // 2. Let value be the string-concatenation of value and subtag. + if (len === 2) { + if (!_.any(keywords, (kw) => kw.key === key)) { + const newKeyword = _.create(); + newKeyword.key = key; + newKeyword.value = value; + _.push(keywords, newKeyword); + } + } else { + if (value !== "") { + value = value + "-"; + } + + value = value + subtag; + } + } + + // f. If len = 2, then + // i. Let isKeyword be true. + // ii. Let key be subtag. + // iii. Let value be the empty String. + if (len === 2) { + isKeyword = true; + key = subtag; + value = ""; + } + + // g. Let k be k + len + 1. + k = k + len + 1; + } + + // 7. If isKeyword is true, then + // a. If keywords does not contain an element whose [[Key]] is the same as key, then + // i. Append the Record{[[Key]]: key, [[Value]]: value} to keywords. + if (isKeyword) { + if (!_.any(keywords, (kw) => kw.key === key)) { + const newKeyword = _.create(); + newKeyword.key = key; + newKeyword.value = value; + _.push(keywords, newKeyword); + } + } + + // 8. Return the Record{[[Attributes]]: attributes, [[Keywords]]: keywords}. + const ret = _.create(); + ret.attributes = attributes; + ret.keywords = keywords; + return ret; + }; + + const InsertUnicodeExtension = function (locale, extension) { + // 3. If locale matches the privateuse or the grandfathered production, throw a RangeError exception. + if (_.match(locale, grandfatheredOrPrivateuseRE) !== null) { + platform.raiseInvalidPrivateOrGrandfatheredTag(); + } + + // 4. Let privateIndex be ! Call(%StringProto_indexOf%, locale, << "-x-" >>). + const privateIndex = _.stringIndexOf(locale, "-x-"); + + // 5. If privateIndex = -1, then + // a. Let locale be the concatenation of locale and extension. + // 6. Else, + // a. Let preExtension be the substring of locale from position 0, inclusive, to position privateIndex, exclusive. + // b. Let postExtension be the substring of locale from position privateIndex to the end of the string. + // c. Let locale be the string-concatenation of preExtension, extension, and postExtension. + if (privateIndex === -1) { + locale = locale + "-" + extension; + } else { + const preExtension = _.substring(locale, 0, privateIndex); + const postExtension = _.substring(locale, privateIndex); + locale = preExtension + "-" + extension + postExtension; + } + + // 8. Return ! CanonicalizeLanguageTag(locale). + return platform.normalizeLanguageTag(locale); + }; + + const LocalePrototype = {}; + + const Locale = tagPublicFunction("Intl.Locale", function Locale(tag, options = undefined) { + if (new.target === undefined) { + platform.raiseNeedObjectOfType("Intl.Locale", "Locale"); + } + + const relevantExtensionKeys = ["ca", "co", "nu", "hc", "kn", "kf"]; + const locale = OrdinaryCreateFromConstructor(new.target, LocalePrototype); + + const localeInternals = _.create(); + platform.setHiddenObject(locale, localeInternals); + + if (typeof tag !== "string" && (typeof tag !== "object" || tag === null)) { + platform.raiseNeedObjectOrString("tag"); + } + + if (typeof tag === "object") { + const tagStateObject = platform.getHiddenObject(tag); + if (tagStateObject && tagStateObject.initializedLocale) { + tag = tagStateObject.locale; + } else { + tag = Internal.ToString(tag); + } + } + + options = options === undefined ? _.create() : Internal.ToObject(options); + + tag = ApplyOptionsToTag(tag, options); + + const opt = _.create(); + opt.ca = getAndValidateExtensionOption(options, "calendar"); + opt.co = getAndValidateExtensionOption(options, "collation"); + opt.hc = GetOption(options, "hourCycle", "string", ["h11", "h12", "h23", "h24"], undefined); + opt.kf = GetOption(options, "caseFirst", "string", ["upper", "lower", "false"], undefined); + const kn = GetOption(options, "numeric", "boolean", undefined, undefined); + opt.kn = kn === undefined ? kn : Internal.ToString(kn); + opt.nu = getAndValidateExtensionOption(options, "numberingSystem"); + + const r = ApplyUnicodeExtensionToTag(tag, opt, relevantExtensionKeys); + + localeInternals.locale = r.locale; + localeInternals.calendar = r.ca; + localeInternals.collation = r.co; + localeInternals.hourCycle = r.hc; + localeInternals.caseFirst = r.kf; + localeInternals.numeric = r.kn; + localeInternals.numberingSystem = r.nu; + + const localeParts = langtagToParts(r.locale); + localeInternals.language = localeParts.language; + localeInternals.script = localeParts.script; + localeInternals.region = localeParts.region; + localeInternals.baseName = localeParts.base; + + localeInternals.initializedLocale = true; + + return locale; + }); + + _.defineProperty(Locale, "prototype", { + value: LocalePrototype, + writable: false, + enumerable: false, + configurable: false, + }); + + _.defineProperty(LocalePrototype, "constructor", { + value: Locale, + writable: true, + enumerable: false, + configurable: true + }); + + _.defineProperty(LocalePrototype, Symbol.toStringTag, { + value: "Intl.Locale", + writable: false, + enumerable: false, + configurable: true + }); + + _.defineProperty(LocalePrototype, "maximize", { + value: createPublicMethod("Intl.Locale.prototype.maximize", function maximize() { + return minMaxImpl(this, "maximize"); + }), + writable: true, + enumerable: false, + configurable: true + }); + + _.defineProperty(LocalePrototype, "minimize", { + value: createPublicMethod("Intl.Locale.prototype.minimize", function minimize() { + return minMaxImpl(this, "minimize"); + }), + writable: true, + enumerable: false, + configurable: true + }); + + _.defineProperty(LocalePrototype, "toString", { + value: createPublicMethod("Intl.Locale.prototype.toString", function toString() { + return getLocaleInternalsObject(this, "toString").locale; + }), + writable: true, + enumerable: false, + configurable: true, + }); + + const createGetter = function (key, uniqueFunctionDecl) { + const getKey = `get ${key}`; + const getter = createPublicMethod(getKey, uniqueFunctionDecl); + _.defineProperty(getter, "name", { + value: getKey, + writable: false, + enumerable: false, + configurable: true, + }); + _.defineProperty(LocalePrototype, key, { + get: getter, + enumerable: false, + configurable: true, + }); + }; + + // duplicate the function for each rather than parameterizing the function in createGetter because of the displayName + // hacks we do in createPublicMethod (see explanation of Date.prototype.toLocale{Date|Time}String) + createGetter("calendar", function () { return getLocaleInternalsObject(this, "calendar").calendar; }); + createGetter("baseName", function () { return getLocaleInternalsObject(this, "baseName").baseName; }); + createGetter("collation", function () { return getLocaleInternalsObject(this, "collation").collation; }); + createGetter("hourCycle", function () { return getLocaleInternalsObject(this, "hourCycle").hourCycle; }); + createGetter("caseFirst", function () { return getLocaleInternalsObject(this, "caseFirst").caseFirst; }); + createGetter("numeric", function () { return getLocaleInternalsObject(this, "numeric").numeric; }); + createGetter("numberingSystem", function () { return getLocaleInternalsObject(this, "numberingSystem").numberingSystem; }); + createGetter("language", function () { return getLocaleInternalsObject(this, "language").language; }); + createGetter("script", function () { return getLocaleInternalsObject(this, "script").script; }); + createGetter("region", function () { return getLocaleInternalsObject(this, "region").region; }); + + _.defineProperty(Intl, "Locale", { value: Locale, writable: true, enumerable: false, configurable: true }); + } + // Initialize Intl properties only if needed if (InitType === "Intl") { _.defineProperty(Intl, "Collator", { value: Collator, writable: true, enumerable: false, configurable: true }); diff --git a/lib/Runtime/Library/IntlEngineInterfaceExtensionObject.cpp b/lib/Runtime/Library/IntlEngineInterfaceExtensionObject.cpp index 6c6c6905d81..9801de6cbeb 100644 --- a/lib/Runtime/Library/IntlEngineInterfaceExtensionObject.cpp +++ b/lib/Runtime/Library/IntlEngineInterfaceExtensionObject.cpp @@ -944,8 +944,7 @@ PROJECTED_ENUMS(PROJECTED_ENUM) // of caution and say it is invalid. // We also check for parsedLength < langtag->GetLength() because there are cases when status == U_ZERO_ERROR // but the langtag was not valid, such as "en-tesTER-TESter" (OSS-Fuzz #6657). - // NOTE: make sure we check for `undefined` at the platform.normalizeLanguageTag callsite. - return scriptContext->GetLibrary()->GetUndefined(); + JavascriptError::ThrowRangeError(scriptContext, JSERR_LocaleNotWellFormed, langtag); } // forLangTagResultLength can be 0 if langtag is "und". diff --git a/test/Intl/Locale.js b/test/Intl/Locale.js new file mode 100644 index 00000000000..6d8e0cd6ed0 --- /dev/null +++ b/test/Intl/Locale.js @@ -0,0 +1,67 @@ +//------------------------------------------------------------------------------------------------------- +// Copyright (C) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. +//------------------------------------------------------------------------------------------------------- + +WScript.LoadScriptFile("..\\UnitTestFramework\\UnitTestFramework.js"); + +testRunner.runTests([ + { + name: "Basic functionality", + body() { + assert.areEqual("Locale", Intl.Locale.name); + const locale = new Intl.Locale("en"); + assert.areEqual("en", locale.toString()); + assert.areEqual("[object Intl.Locale]", Object.prototype.toString.call(locale)); + }, + }, + { + name: "Applying options", + body() { + function test(expected, langtag, options) { + const locale = new Intl.Locale(langtag, options); + assert.areEqual(expected, locale.toString()); + } + + test("es", "es"); + test("de", "en", { language: "de" }); + test("de-Latn-DE-u-ca-chinese", "en", { language: "de", script: "Latn", region: "DE", calendar: "chinese" }); + test("ar-u-co-unihan", "ar-u-co-unihan"); + test("ar-u-co-unihan", "ar", { collation: "unihan" }); + } + }, + { + name: "Using an existing Locale object for the langtag argument", + body() { + const enUS = new Intl.Locale("en-US"); + assert.areEqual("en", enUS.language); + assert.areEqual("US", enUS.region); + + const enGB = new Intl.Locale(enUS, { region: "GB" }); + assert.areEqual("en", enGB.language); + assert.areEqual("GB", enGB.region); + + const deGB = new Intl.Locale(enGB, { language: "de" }); + assert.areEqual("de", deGB.language); + assert.areEqual("GB", deGB.region); + + const deLatnGB = new Intl.Locale(deGB, { script: "Latn" }); + assert.areEqual("de", deLatnGB.language); + assert.areEqual("GB", deLatnGB.region); + assert.areEqual("Latn", deLatnGB.script); + + const dePhonebk = new Intl.Locale("de-u-co-phonebk"); + assert.areEqual("de", dePhonebk.language); + assert.areEqual("phonebk", dePhonebk.collation); + + const deUnihan = new Intl.Locale(dePhonebk, { collation: "unihan" }); + assert.areEqual("de", deUnihan.language); + assert.areEqual("unihan", deUnihan.collation); + + esUnihanH24 = new Intl.Locale(deUnihan, { language: "es", hourCycle: "h24" }); + assert.areEqual("es", esUnihanH24.language); + assert.areEqual("unihan", esUnihanH24.collation); + assert.areEqual("h24", esUnihanH24.hourCycle); + } + }, +], { verbose: false }) diff --git a/test/Intl/rlexe.xml b/test/Intl/rlexe.xml index dd9502b4e4f..606de788dfb 100644 --- a/test/Intl/rlexe.xml +++ b/test/Intl/rlexe.xml @@ -118,6 +118,12 @@ Intl,exclude_windows + + + Locale.js + Intl,exclude_windows + +