/root/firefox-clang/intl/icu/source/common/locid.cpp

Bug Summary

File:	root/firefox-clang/intl/icu/source/common/locid.cpp
Warning:	line 1905, column 9 Value stored to 'separator' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name locid.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/common -fcoverage-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/common -resource-dir /usr/lib/llvm-21/lib/clang/21 -include /root/firefox-clang/config/gcc_hidden.h -include /root/firefox-clang/obj-x86_64-pc-linux-gnu/mozilla-config.h -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -D U_COMMON_IMPLEMENTATION -D _LIBCPP_DISABLE_DEPRECATION_WARNINGS -D U_USING_ICU_NAMESPACE=0 -D U_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 -D U_HIDE_OBSOLETE_UTF_OLD_H=1 -D UCONFIG_NO_LEGACY_CONVERSION -D UCONFIG_NO_TRANSLITERATION -D UCONFIG_NO_REGULAR_EXPRESSIONS -D UCONFIG_NO_BREAK_ITERATION -D UCONFIG_NO_IDNA -D UCONFIG_NO_MF2 -D U_CHARSET_IS_UTF8 -D UNISTR_FROM_CHAR_EXPLICIT=explicit -D UNISTR_FROM_STRING_EXPLICIT=explicit -D U_ENABLE_DYLOAD=0 -D U_DEBUG=1 -I /root/firefox-clang/config/external/icu/common -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/common -I /root/firefox-clang/intl/icu/source/i18n -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/x86_64-linux-gnu/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/backward -internal-isystem /usr/lib/llvm-21/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=pessimizing-move -Wno-error=large-by-value-copy=128 -Wno-error=implicit-int-float-conversion -Wno-error=thread-safety-analysis -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -Wno-comma -Wno-implicit-const-int-float-conversion -Wno-macro-redefined -Wno-microsoft-include -Wno-tautological-unsigned-enum-zero-compare -Wno-unreachable-code-loop-increment -Wno-unreachable-code-return -fdeprecated-macro -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fno-sized-deallocation -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-06-30-093548-1913035-1 -x c++ /root/firefox-clang/intl/icu/source/common/locid.cpp

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 1997-2016, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	*
9	* File locid.cpp
10	*
11	* Created by: Richard Gillam
12	*
13	* Modification History:
14	*
15	* Date Name Description
16	* 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
17	* methods to get and set it.
18	* 04/02/97 aliu Made operator!= inline; fixed return value
19	* of getName().
20	* 04/15/97 aliu Cleanup for AIX/Win32.
21	* 04/24/97 aliu Numerous changes per code review.
22	* 08/18/98 stephen Changed getDisplayName()
23	* Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
24	* Added getISOCountries(), getISOLanguages(),
25	* getLanguagesForCountry()
26	* 03/16/99 bertrand rehaul.
27	* 07/21/99 stephen Added U_CFUNC setDefault
28	* 11/09/99 weiv Added const char * getName() const;
29	* 04/12/00 srl removing unicodestring api's and cached hash code
30	* 08/10/01 grhoten Change the static Locales to accessor functions
31	******************************************************************************
32	*/
33
34	#include <optional>
35	#include <string_view>
36	#include <utility>
37
38	#include "unicode/bytestream.h"
39	#include "unicode/locid.h"
40	#include "unicode/localebuilder.h"
41	#include "unicode/strenum.h"
42	#include "unicode/stringpiece.h"
43	#include "unicode/uloc.h"
44	#include "unicode/ures.h"
45
46	#include "bytesinkutil.h"
47	#include "charstr.h"
48	#include "charstrmap.h"
49	#include "cmemory.h"
50	#include "cstring.h"
51	#include "mutex.h"
52	#include "putilimp.h"
53	#include "uassert.h"
54	#include "ucln_cmn.h"
55	#include "uhash.h"
56	#include "ulocimp.h"
57	#include "umutex.h"
58	#include "uniquecharstr.h"
59	#include "ustr_imp.h"
60	#include "uvector.h"
61
62	U_NAMESPACE_BEGINnamespace icu_77 {
63
64	static Locale *gLocaleCache = nullptr;
65	static UInitOnce gLocaleCacheInitOnce {};
66
67	// gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
68	static UMutex gDefaultLocaleMutex;
69	static UHashtable *gDefaultLocalesHashT = nullptr;
70	static Locale *gDefaultLocale = nullptr;
71
72	/**
73	* \def ULOC_STRING_LIMIT
74	* strings beyond this value crash in CharString
75	*/
76	#define ULOC_STRING_LIMIT357913941 357913941
77
78	U_NAMESPACE_END}
79
80	typedef enum ELocalePos {
81	eENGLISH,
82	eFRENCH,
83	eGERMAN,
84	eITALIAN,
85	eJAPANESE,
86	eKOREAN,
87	eCHINESE,
88
89	eFRANCE,
90	eGERMANY,
91	eITALY,
92	eJAPAN,
93	eKOREA,
94	eCHINA, /* Alias for PRC */
95	eTAIWAN,
96	eUK,
97	eUS,
98	eCANADA,
99	eCANADA_FRENCH,
100	eROOT,
101
102
103	//eDEFAULT,
104	eMAX_LOCALES
105	} ELocalePos;
106
107	namespace {
108
109	//
110	// Deleter function for Locales owned by the default Locale hash table/
111	//
112	void U_CALLCONV
113	deleteLocale(void *obj) {
114	delete static_cast<icu::Locale*>(obj);
115	}
116
117	UBool U_CALLCONV locale_cleanup()
118	{
119	U_NAMESPACE_USEusing namespace icu_77;
120
121	delete [] gLocaleCache;
122	gLocaleCache = nullptr;
123	gLocaleCacheInitOnce.reset();
124
125	if (gDefaultLocalesHashT) {
126	uhash_closeuhash_close_77(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
127	gDefaultLocalesHashT = nullptr;
128	}
129	gDefaultLocale = nullptr;
130	return true;
131	}
132
133	void U_CALLCONV locale_init(UErrorCode &status) {
134	U_NAMESPACE_USEusing namespace icu_77;
135
136	U_ASSERT(gLocaleCache == nullptr)(static_cast <bool> (gLocaleCache == nullptr) ? void (0 ) : __assert_fail ("gLocaleCache == nullptr", __builtin_FILE ( ), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__));
137	gLocaleCache = new Locale[static_cast<int>(eMAX_LOCALES)];
138	if (gLocaleCache == nullptr) {
139	status = U_MEMORY_ALLOCATION_ERROR;
140	return;
141	}
142	ucln_common_registerCleanupucln_common_registerCleanup_77(UCLN_COMMON_LOCALE, locale_cleanup);
143	gLocaleCache[eROOT] = Locale("");
144	gLocaleCache[eENGLISH] = Locale("en");
145	gLocaleCache[eFRENCH] = Locale("fr");
146	gLocaleCache[eGERMAN] = Locale("de");
147	gLocaleCache[eITALIAN] = Locale("it");
148	gLocaleCache[eJAPANESE] = Locale("ja");
149	gLocaleCache[eKOREAN] = Locale("ko");
150	gLocaleCache[eCHINESE] = Locale("zh");
151	gLocaleCache[eFRANCE] = Locale("fr", "FR");
152	gLocaleCache[eGERMANY] = Locale("de", "DE");
153	gLocaleCache[eITALY] = Locale("it", "IT");
154	gLocaleCache[eJAPAN] = Locale("ja", "JP");
155	gLocaleCache[eKOREA] = Locale("ko", "KR");
156	gLocaleCache[eCHINA] = Locale("zh", "CN");
157	gLocaleCache[eTAIWAN] = Locale("zh", "TW");
158	gLocaleCache[eUK] = Locale("en", "GB");
159	gLocaleCache[eUS] = Locale("en", "US");
160	gLocaleCache[eCANADA] = Locale("en", "CA");
161	gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
162	}
163
164	} // namespace
165
166	U_NAMESPACE_BEGINnamespace icu_77 {
167
168	Locale locale_set_default_internal(const char id, UErrorCode& status) {
169	// Synchronize this entire function.
170	Mutex lock(&gDefaultLocaleMutex);
171
172	UBool canonicalize = false;
173
174	// If given a nullptr string for the locale id, grab the default
175	// name from the system.
176	// (Different from most other locale APIs, where a null name means use
177	// the current ICU default locale.)
178	if (id == nullptr) {
179	id = uprv_getDefaultLocaleIDuprv_getDefaultLocaleID_77(); // This function not thread safe? TODO: verify.
180	canonicalize = true; // always canonicalize host ID
181	}
182
183	CharString localeNameBuf =
184	canonicalize ? ulocimp_canonicalizeulocimp_canonicalize_77(id, status) : ulocimp_getNameulocimp_getName_77(id, status);
185
186	if (U_FAILURE(status)) {
187	return gDefaultLocale;
188	}
189
190	if (gDefaultLocalesHashT == nullptr) {
191	gDefaultLocalesHashT = uhash_openuhash_open_77(uhash_hashCharsuhash_hashChars_77, uhash_compareCharsuhash_compareChars_77, nullptr, &status);
192	if (U_FAILURE(status)) {
193	return gDefaultLocale;
194	}
195	uhash_setValueDeleteruhash_setValueDeleter_77(gDefaultLocalesHashT, deleteLocale);
196	ucln_common_registerCleanupucln_common_registerCleanup_77(UCLN_COMMON_LOCALE, locale_cleanup);
197	}
198
199	Locale* newDefault = static_cast<Locale*>(uhash_getuhash_get_77(gDefaultLocalesHashT, localeNameBuf.data()));
200	if (newDefault == nullptr) {
201	newDefault = new Locale(Locale::eBOGUS);
202	if (newDefault == nullptr) {
203	status = U_MEMORY_ALLOCATION_ERROR;
204	return gDefaultLocale;
205	}
206	newDefault->init(localeNameBuf.data(), false);
207	uhash_putuhash_put_77(gDefaultLocalesHashT, const_cast<char*>(newDefault->getName()), newDefault, &status);
208	if (U_FAILURE(status)) {
209	return gDefaultLocale;
210	}
211	}
212	gDefaultLocale = newDefault;
213	return gDefaultLocale;
214	}
215
216	U_NAMESPACE_END}
217
218	/* sfb 07/21/99 */
219	U_CFUNCextern "C" void
220	locale_set_defaultlocale_set_default_77(const char *id)
221	{
222	U_NAMESPACE_USEusing namespace icu_77;
223	UErrorCode status = U_ZERO_ERROR;
224	locale_set_default_internal(id, status);
225	}
226	/* end */
227
228	U_CFUNCextern "C" const char *
229	locale_get_defaultlocale_get_default_77()
230	{
231	U_NAMESPACE_USEusing namespace icu_77;
232	return Locale::getDefault().getName();
233	}
234
235
236	U_NAMESPACE_BEGINnamespace icu_77 {
237
238	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)UClassID Locale::getStaticClassID() { static char classID = 0 ; return (UClassID)&classID; } UClassID Locale::getDynamicClassID () const { return Locale::getStaticClassID(); }
239
240	/Character separating the posix id fields/
241	// '_'
242	// In the platform codepage.
243	#define SEP_CHAR'_' '_'
244	#define NULL_CHAR'\0' '\0'
245
246	Locale::~Locale()
247	{
248	if ((baseName != fullName) && (baseName != fullNameBuffer)) {
249	uprv_freeuprv_free_77(baseName);
250	}
251	baseName = nullptr;
252	/if fullName is on the heap, we free it/
253	if (fullName != fullNameBuffer)
254	{
255	uprv_freeuprv_free_77(fullName);
256	fullName = nullptr;
257	}
258	}
259
260	Locale::Locale()
261	: UObject(), fullName(fullNameBuffer), baseName(nullptr)
262	{
263	init(nullptr, false);
264	}
265
266	/*
267	* Internal constructor to allow construction of a locale object with
268	* NO side effects. (Default constructor tries to get
269	* the default locale.)
270	*/
271	Locale::Locale(Locale::ELocaleType)
272	: UObject(), fullName(fullNameBuffer), baseName(nullptr)
273	{
274	setToBogus();
275	}
276
277
278	Locale::Locale( const char * newLanguage,
279	const char * newCountry,
280	const char * newVariant,
281	const char * newKeywords)
282	: UObject(), fullName(fullNameBuffer), baseName(nullptr)
283	{
284	if( (newLanguage==nullptr) && (newCountry == nullptr) && (newVariant == nullptr) )
285	{
286	init(nullptr, false); /* shortcut */
287	}
288	else
289	{
290	UErrorCode status = U_ZERO_ERROR;
291	int32_t lsize = 0;
292	int32_t csize = 0;
293	int32_t vsize = 0;
294	int32_t ksize = 0;
295
296	// Check the sizes of the input strings.
297
298	// Language
299	if ( newLanguage != nullptr )
300	{
301	lsize = static_cast<int32_t>(uprv_strlen(newLanguage):: strlen(newLanguage));
302	if ( lsize < 0 \|\| lsize > ULOC_STRING_LIMIT357913941 ) { // int32 wrap
303	setToBogus();
304	return;
305	}
306	}
307
308	CharString togo(newLanguage, lsize, status); // start with newLanguage
309
310	// _Country
311	if ( newCountry != nullptr )
312	{
313	csize = static_cast<int32_t>(uprv_strlen(newCountry):: strlen(newCountry));
314	if ( csize < 0 \|\| csize > ULOC_STRING_LIMIT357913941 ) { // int32 wrap
315	setToBogus();
316	return;
317	}
318	}
319
320	// _Variant
321	if ( newVariant != nullptr )
322	{
323	// remove leading _'s
324	while(newVariant[0] == SEP_CHAR'_')
325	{
326	newVariant++;
327	}
328
329	// remove trailing _'s
330	vsize = static_cast<int32_t>(uprv_strlen(newVariant):: strlen(newVariant));
331	if ( vsize < 0 \|\| vsize > ULOC_STRING_LIMIT357913941 ) { // int32 wrap
332	setToBogus();
333	return;
334	}
335	while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR'_') )
336	{
337	vsize--;
338	}
339	}
340
341	if ( newKeywords != nullptr)
342	{
343	ksize = static_cast<int32_t>(uprv_strlen(newKeywords):: strlen(newKeywords));
344	if ( ksize < 0 \|\| ksize > ULOC_STRING_LIMIT357913941 ) {
345	setToBogus();
346	return;
347	}
348	}
349
350	// We've checked the input sizes, now build up the full locale string..
351
352	// newLanguage is already copied
353
354	if ( ( vsize != 0 ) \|\| (csize != 0) ) // at least: __v
355	{ // ^
356	togo.append(SEP_CHAR'_', status);
357	}
358
359	if ( csize != 0 )
360	{
361	togo.append(newCountry, status);
362	}
363
364	if ( vsize != 0)
365	{
366	togo.append(SEP_CHAR'_', status)
367	.append(newVariant, vsize, status);
368	}
369
370	if ( ksize != 0)
371	{
372	if (uprv_strchr(newKeywords, '='):: strchr(newKeywords, '=')) {
373	togo.append('@', status); /* keyword parsing */
374	}
375	else {
376	togo.append('_', status); /* Variant parsing with a script */
377	if ( vsize == 0) {
378	togo.append('_', status); /* No country found */
379	}
380	}
381	togo.append(newKeywords, status);
382	}
383
384	if (U_FAILURE(status)) {
385	// Something went wrong with appending, etc.
386	setToBogus();
387	return;
388	}
389	// Parse it, because for example 'language' might really be a complete
390	// string.
391	init(togo.data(), false);
392	}
393	}
394
395	Locale::Locale(const Locale &other)
396	: UObject(other), fullName(fullNameBuffer), baseName(nullptr)
397	{
398	*this = other;
399	}
400
401	Locale::Locale(Locale&& other) noexcept
402	: UObject(other), fullName(fullNameBuffer), baseName(fullName) {
403	*this = std::move(other);
404	}
405
406	Locale& Locale::operator=(const Locale& other) {
407	if (this == &other) {
408	return *this;
409	}
410
411	setToBogus();
412
413	if (other.fullName == other.fullNameBuffer) {
414	uprv_strcpy(fullNameBuffer, other.fullNameBuffer):: strcpy(fullNameBuffer, other.fullNameBuffer);
415	} else if (other.fullName == nullptr) {
416	fullName = nullptr;
417	} else {
418	fullName = uprv_strdupuprv_strdup_77(other.fullName);
419	if (fullName == nullptr) return *this;
420	}
421
422	if (other.baseName == other.fullName) {
423	baseName = fullName;
424	} else if (other.baseName != nullptr) {
425	baseName = uprv_strdupuprv_strdup_77(other.baseName);
426	if (baseName == nullptr) return *this;
427	}
428
429	uprv_strcpy(language, other.language):: strcpy(language, other.language);
430	uprv_strcpy(script, other.script):: strcpy(script, other.script);
431	uprv_strcpy(country, other.country):: strcpy(country, other.country);
432
433	variantBegin = other.variantBegin;
434	fIsBogus = other.fIsBogus;
435
436	return *this;
437	}
438
439	Locale& Locale::operator=(Locale&& other) noexcept {
440	if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_freeuprv_free_77(baseName);
441	if (fullName != fullNameBuffer) uprv_freeuprv_free_77(fullName);
442
443	if (other.fullName == other.fullNameBuffer \|\| other.baseName == other.fullNameBuffer) {
444	uprv_strcpy(fullNameBuffer, other.fullNameBuffer):: strcpy(fullNameBuffer, other.fullNameBuffer);
445	}
446	if (other.fullName == other.fullNameBuffer) {
447	fullName = fullNameBuffer;
448	} else {
449	fullName = other.fullName;
450	}
451
452	if (other.baseName == other.fullNameBuffer) {
453	baseName = fullNameBuffer;
454	} else if (other.baseName == other.fullName) {
455	baseName = fullName;
456	} else {
457	baseName = other.baseName;
458	}
459
460	uprv_strcpy(language, other.language):: strcpy(language, other.language);
461	uprv_strcpy(script, other.script):: strcpy(script, other.script);
462	uprv_strcpy(country, other.country):: strcpy(country, other.country);
463
464	variantBegin = other.variantBegin;
465	fIsBogus = other.fIsBogus;
466
467	other.baseName = other.fullName = other.fullNameBuffer;
468
469	return *this;
470	}
471
472	Locale *
473	Locale::clone() const {
474	return new Locale(*this);
475	}
476
477	bool
478	Locale::operator==( const Locale& other) const
479	{
480	return (uprv_strcmp(other.fullName, fullName):: strcmp(other.fullName, fullName) == 0);
481	}
482
483	namespace {
484
485	UInitOnce gKnownCanonicalizedInitOnce {};
486	UHashtable *gKnownCanonicalized = nullptr;
487
488	constexpr const char* KNOWN_CANONICALIZED[] = {
489	"c",
490	// Commonly used locales known are already canonicalized
491	"af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
492	"be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES",
493	"cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR",
494	"en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu",
495	"eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR",
496	"ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN",
497	"hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS",
498	"it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ",
499	"km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
500	"lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
501	"mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
502	"nl", "nl_NL", "no", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
503	"pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
504	"si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
505	"sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
506	"ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk",
507	"uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant",
508	"yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant",
509	"zh_Hant_TW", "zh_TW", "zu", "zu_ZA"
510	};
511
512	UBool U_CALLCONV cleanupKnownCanonicalized() {
513	gKnownCanonicalizedInitOnce.reset();
514	if (gKnownCanonicalized) { uhash_closeuhash_close_77(gKnownCanonicalized); }
515	return true;
516	}
517
518	void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
519	ucln_common_registerCleanupucln_common_registerCleanup_77(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
520	cleanupKnownCanonicalized);
521	LocalUHashtablePointer newKnownCanonicalizedMap(
522	uhash_openuhash_open_77(uhash_hashCharsuhash_hashChars_77, uhash_compareCharsuhash_compareChars_77, nullptr, &status));
523	for (int32_t i = 0;
524	U_SUCCESS(status) && i < UPRV_LENGTHOF(KNOWN_CANONICALIZED)(int32_t)(sizeof(KNOWN_CANONICALIZED)/sizeof((KNOWN_CANONICALIZED )[0]));
525	i++) {
526	uhash_putiuhash_puti_77(newKnownCanonicalizedMap.getAlias(),
527	(void*)KNOWN_CANONICALIZED[i],
528	1, &status);
529	}
530	if (U_FAILURE(status)) {
531	return;
532	}
533
534	gKnownCanonicalized = newKnownCanonicalizedMap.orphan();
535	}
536
537	class AliasData;
538
539	/**
540	* A Builder class to build the alias data.
541	*/
542	class AliasDataBuilder {
543	public:
544	AliasDataBuilder() {
545	}
546
547	// Build the AliasData from resource.
548	AliasData* build(UErrorCode &status);
549
550	private:
551	void readAlias(UResourceBundle* alias,
552	UniqueCharStrings* strings,
553	LocalMemory<const char*>& types,
554	LocalMemory<int32_t>& replacementIndexes,
555	int32_t &length,
556	void (checkType)(const char type),
557	void (checkReplacement)(const UChar replacement),
558	UErrorCode &status);
559
560	// Read the languageAlias data from alias to
561	// strings+types+replacementIndexes
562	// The number of record will be stored into length.
563	// Allocate length items for types, to store the type field.
564	// Allocate length items for replacementIndexes,
565	// to store the index in the strings for the replacement script.
566	void readLanguageAlias(UResourceBundle* alias,
567	UniqueCharStrings* strings,
568	LocalMemory<const char*>& types,
569	LocalMemory<int32_t>& replacementIndexes,
570	int32_t &length,
571	UErrorCode &status);
572
573	// Read the scriptAlias data from alias to
574	// strings+types+replacementIndexes
575	// Allocate length items for types, to store the type field.
576	// Allocate length items for replacementIndexes,
577	// to store the index in the strings for the replacement script.
578	void readScriptAlias(UResourceBundle* alias,
579	UniqueCharStrings* strings,
580	LocalMemory<const char*>& types,
581	LocalMemory<int32_t>& replacementIndexes,
582	int32_t &length, UErrorCode &status);
583
584	// Read the territoryAlias data from alias to
585	// strings+types+replacementIndexes
586	// Allocate length items for types, to store the type field.
587	// Allocate length items for replacementIndexes,
588	// to store the index in the strings for the replacement script.
589	void readTerritoryAlias(UResourceBundle* alias,
590	UniqueCharStrings* strings,
591	LocalMemory<const char*>& types,
592	LocalMemory<int32_t>& replacementIndexes,
593	int32_t &length, UErrorCode &status);
594
595	// Read the variantAlias data from alias to
596	// strings+types+replacementIndexes
597	// Allocate length items for types, to store the type field.
598	// Allocate length items for replacementIndexes,
599	// to store the index in the strings for the replacement variant.
600	void readVariantAlias(UResourceBundle* alias,
601	UniqueCharStrings* strings,
602	LocalMemory<const char*>& types,
603	LocalMemory<int32_t>& replacementIndexes,
604	int32_t &length, UErrorCode &status);
605
606	// Read the subdivisionAlias data from alias to
607	// strings+types+replacementIndexes
608	// Allocate length items for types, to store the type field.
609	// Allocate length items for replacementIndexes,
610	// to store the index in the strings for the replacement variant.
611	void readSubdivisionAlias(UResourceBundle* alias,
612	UniqueCharStrings* strings,
613	LocalMemory<const char*>& types,
614	LocalMemory<int32_t>& replacementIndexes,
615	int32_t &length, UErrorCode &status);
616	};
617
618	/**
619	* A class to hold the Alias Data.
620	*/
621	class AliasData : public UMemory {
622	public:
623	static const AliasData* singleton(UErrorCode& status) {
624	if (U_FAILURE(status)) {
625	// Do not get into loadData if the status already has error.
626	return nullptr;
627	}
628	umtx_initOnce(AliasData::gInitOnce, &AliasData::loadData, status);
629	return gSingleton;
630	}
631
632	const CharStringMap& languageMap() const { return language; }
633	const CharStringMap& scriptMap() const { return script; }
634	const CharStringMap& territoryMap() const { return territory; }
635	const CharStringMap& variantMap() const { return variant; }
636	const CharStringMap& subdivisionMap() const { return subdivision; }
637
638	static void U_CALLCONV loadData(UErrorCode &status);
639	static UBool U_CALLCONV cleanup();
640
641	static UInitOnce gInitOnce;
642
643	private:
644	AliasData(CharStringMap languageMap,
645	CharStringMap scriptMap,
646	CharStringMap territoryMap,
647	CharStringMap variantMap,
648	CharStringMap subdivisionMap,
649	CharString* strings)
650	: language(std::move(languageMap)),
651	script(std::move(scriptMap)),
652	territory(std::move(territoryMap)),
653	variant(std::move(variantMap)),
654	subdivision(std::move(subdivisionMap)),
655	strings(strings) {
656	}
657
658	~AliasData() {
659	delete strings;
660	}
661
662	static const AliasData* gSingleton;
663
664	CharStringMap language;
665	CharStringMap script;
666	CharStringMap territory;
667	CharStringMap variant;
668	CharStringMap subdivision;
669	CharString* strings;
670
671	friend class AliasDataBuilder;
672	};
673
674
675	const AliasData* AliasData::gSingleton = nullptr;
676	UInitOnce AliasData::gInitOnce {};
677
678	UBool U_CALLCONV
679	AliasData::cleanup()
680	{
681	gInitOnce.reset();
682	delete gSingleton;
683	return true;
684	}
685
686	void
687	AliasDataBuilder::readAlias(
688	UResourceBundle* alias,
689	UniqueCharStrings* strings,
690	LocalMemory<const char*>& types,
691	LocalMemory<int32_t>& replacementIndexes,
692	int32_t &length,
693	void (checkType)(const char type),
694	void (checkReplacement)(const UChar replacement),
695	UErrorCode &status) {
696	if (U_FAILURE(status)) {
697	return;
698	}
699	length = ures_getSizeures_getSize_77(alias);
700	const char** rawTypes = types.allocateInsteadAndCopy(length);
701	if (rawTypes == nullptr) {
702	status = U_MEMORY_ALLOCATION_ERROR;
703	return;
704	}
705	int32_t* rawIndexes = replacementIndexes.allocateInsteadAndCopy(length);
706	if (rawIndexes == nullptr) {
707	status = U_MEMORY_ALLOCATION_ERROR;
708	return;
709	}
710	for (int i = 0; U_SUCCESS(status) && ures_hasNextures_hasNext_77(alias); i++) {
711	LocalUResourceBundlePointer res(
712	ures_getNextResourceures_getNextResource_77(alias, nullptr, &status));
713	const char* aliasFrom = ures_getKeyures_getKey_77(res.getAlias());
714	const UChar* aliasTo =
715	ures_getStringByKeyures_getStringByKey_77(res.getAlias(), "replacement", nullptr, &status);
716	if (U_FAILURE(status)) return;
717
718	checkType(aliasFrom);
719	checkReplacement(aliasTo);
720
721	rawTypes[i] = aliasFrom;
722	rawIndexes[i] = strings->add(aliasTo, status);
723	}
724	}
725
726	/**
727	* Read the languageAlias data from alias to strings+types+replacementIndexes.
728	* Allocate length items for types, to store the type field. Allocate length
729	* items for replacementIndexes, to store the index in the strings for the
730	* replacement language.
731	*/
732	void
733	AliasDataBuilder::readLanguageAlias(
734	UResourceBundle* alias,
735	UniqueCharStrings* strings,
736	LocalMemory<const char*>& types,
737	LocalMemory<int32_t>& replacementIndexes,
738	int32_t &length,
739	UErrorCode &status)
740	{
741	return readAlias(
742	alias, strings, types, replacementIndexes, length,
743	#if U_DEBUG1
744	[](const char* type) {
745	// Assert the aliasFrom only contains the following possibilities
746	// language_REGION_variant
747	// language_REGION
748	// language_variant
749	// language
750	// und_variant
751	Locale test(type);
752	// Assert no script in aliasFrom
753	U_ASSERT(test.getScript()[0] == '\0')(static_cast <bool> (test.getScript()[0] == '\0') ? void (0) : __assert_fail ("test.getScript()[0] == '\\0'", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__));
754	// Assert when language is und, no REGION in aliasFrom.
755	U_ASSERT(test.getLanguage()[0] != '\0' \|\| test.getCountry()[0] == '\0')(static_cast <bool> (test.getLanguage()[0] != '\0' \|\| test .getCountry()[0] == '\0') ? void (0) : __assert_fail ("test.getLanguage()[0] != '\\0' \|\| test.getCountry()[0] == '\\0'" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ));
756	},
757	#else
758	[](const char*) {},
759	#endif
760	[](const UChar*) {}, status);
761	}
762
763	/**
764	* Read the scriptAlias data from alias to strings+types+replacementIndexes.
765	* Allocate length items for types, to store the type field. Allocate length
766	* items for replacementIndexes, to store the index in the strings for the
767	* replacement script.
768	*/
769	void
770	AliasDataBuilder::readScriptAlias(
771	UResourceBundle* alias,
772	UniqueCharStrings* strings,
773	LocalMemory<const char*>& types,
774	LocalMemory<int32_t>& replacementIndexes,
775	int32_t &length,
776	UErrorCode &status)
777	{
778	return readAlias(
779	alias, strings, types, replacementIndexes, length,
780	#if U_DEBUG1
781	[](const char* type) {
782	U_ASSERT(uprv_strlen(type) == 4)(static_cast <bool> (:: strlen(type) == 4) ? void (0) : __assert_fail (":: strlen(type) == 4", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__));
783	},
784	[](const UChar* replacement) {
785	U_ASSERT(u_strlen(replacement) == 4)(static_cast <bool> (u_strlen_77(replacement) == 4) ? void (0) : __assert_fail ("u_strlen_77(replacement) == 4", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__));
786	},
787	#else
788	[](const char*) {},
789	[](const UChar*) { },
790	#endif
791	status);
792	}
793
794	/**
795	* Read the territoryAlias data from alias to strings+types+replacementIndexes.
796	* Allocate length items for types, to store the type field. Allocate length
797	* items for replacementIndexes, to store the index in the strings for the
798	* replacement regions.
799	*/
800	void
801	AliasDataBuilder::readTerritoryAlias(
802	UResourceBundle* alias,
803	UniqueCharStrings* strings,
804	LocalMemory<const char*>& types,
805	LocalMemory<int32_t>& replacementIndexes,
806	int32_t &length,
807	UErrorCode &status)
808	{
809	return readAlias(
810	alias, strings, types, replacementIndexes, length,
811	#if U_DEBUG1
812	[](const char* type) {
813	U_ASSERT(uprv_strlen(type) == 2 \|\| uprv_strlen(type) == 3)(static_cast <bool> (:: strlen(type) == 2 \|\| :: strlen( type) == 3) ? void (0) : __assert_fail (":: strlen(type) == 2 \|\| :: strlen(type) == 3" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ));
814	},
815	#else
816	[](const char*) {},
817	#endif
818	[](const UChar*) { },
819	status);
820	}
821
822	/**
823	* Read the variantAlias data from alias to strings+types+replacementIndexes.
824	* Allocate length items for types, to store the type field. Allocate length
825	* items for replacementIndexes, to store the index in the strings for the
826	* replacement variant.
827	*/
828	void
829	AliasDataBuilder::readVariantAlias(
830	UResourceBundle* alias,
831	UniqueCharStrings* strings,
832	LocalMemory<const char*>& types,
833	LocalMemory<int32_t>& replacementIndexes,
834	int32_t &length,
835	UErrorCode &status)
836	{
837	return readAlias(
838	alias, strings, types, replacementIndexes, length,
839	#if U_DEBUG1
840	[](const char* type) {
841	U_ASSERT(uprv_strlen(type) >= 4 && uprv_strlen(type) <= 8)(static_cast <bool> (:: strlen(type) >= 4 && :: strlen(type) <= 8) ? void (0) : __assert_fail (":: strlen(type) >= 4 && :: strlen(type) <= 8" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ));
842	U_ASSERT(uprv_strlen(type) != 4 \|\|(static_cast <bool> (:: strlen(type) != 4 \|\| (type[0] >= '0' && type[0] <= '9')) ? void (0) : __assert_fail (":: strlen(type) != 4 \|\| (type[0] >= '0' && type[0] <= '9')" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ))
843	(type[0] >= '0' && type[0] <= '9'))(static_cast <bool> (:: strlen(type) != 4 \|\| (type[0] >= '0' && type[0] <= '9')) ? void (0) : __assert_fail (":: strlen(type) != 4 \|\| (type[0] >= '0' && type[0] <= '9')" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ));
844	},
845	[](const UChar* replacement) {
846	int32_t len = u_strlenu_strlen_77(replacement);
847	U_ASSERT(len >= 4 && len <= 8)(static_cast <bool> (len >= 4 && len <= 8 ) ? void (0) : __assert_fail ("len >= 4 && len <= 8" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ));
848	U_ASSERT(len != 4 \|\|(static_cast <bool> (len != 4 \|\| (replacement >= u'0' && replacement <= u'9')) ? void (0) : __assert_fail ("len != 4 \|\| (replacement >= u'0' && replacement <= u'9')" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ))
849	(replacement >= u'0' &&(static_cast <bool> (len != 4 \|\| (replacement >= u'0' && replacement <= u'9')) ? void (0) : __assert_fail ("len != 4 \|\| (replacement >= u'0' && *replacement <= u'9')" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ))
850	replacement <= u'9'))(static_cast <bool> (len != 4 \|\| (replacement >= u'0' && replacement <= u'9')) ? void (0) : __assert_fail ("len != 4 \|\| (replacement >= u'0' && *replacement <= u'9')" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ));
851	},
852	#else
853	[](const char*) {},
854	[](const UChar*) { },
855	#endif
856	status);
857	}
858
859	/**
860	* Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
861	* Allocate length items for types, to store the type field. Allocate length
862	* items for replacementIndexes, to store the index in the strings for the
863	* replacement regions.
864	*/
865	void
866	AliasDataBuilder::readSubdivisionAlias(
867	UResourceBundle* alias,
868	UniqueCharStrings* strings,
869	LocalMemory<const char*>& types,
870	LocalMemory<int32_t>& replacementIndexes,
871	int32_t &length,
872	UErrorCode &status)
873	{
874	return readAlias(
875	alias, strings, types, replacementIndexes, length,
876	#if U_DEBUG1
877	[](const char* type) {
878	U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8)(static_cast <bool> (:: strlen(type) >= 3 && :: strlen(type) <= 8) ? void (0) : __assert_fail (":: strlen(type) >= 3 && :: strlen(type) <= 8" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ));
879	},
880	#else
881	[](const char*) {},
882	#endif
883	[](const UChar*) { },
884	status);
885	}
886
887	/**
888	* Initializes the alias data from the ICU resource bundles. The alias data
889	* contains alias of language, country, script and variants.
890	*
891	* If the alias data has already loaded, then this method simply returns without
892	* doing anything meaningful.
893	*/
894	void U_CALLCONV
895	AliasData::loadData(UErrorCode &status)
896	{
897	#ifdef LOCALE_CANONICALIZATION_DEBUG
898	UDate start = uprv_getRawUTCtimeuprv_getRawUTCtime_77();
899	#endif // LOCALE_CANONICALIZATION_DEBUG
900	ucln_common_registerCleanupucln_common_registerCleanup_77(UCLN_COMMON_LOCALE_ALIAS, cleanup);
901	AliasDataBuilder builder;
902	gSingleton = builder.build(status);
903	#ifdef LOCALE_CANONICALIZATION_DEBUG
904	UDate end = uprv_getRawUTCtimeuprv_getRawUTCtime_77();
905	printf("AliasData::loadData took total %f ms\n", end - start);
906	#endif // LOCALE_CANONICALIZATION_DEBUG
907	}
908
909	/**
910	* Build the alias data from resources.
911	*/
912	AliasData*
913	AliasDataBuilder::build(UErrorCode &status) {
914	if (U_FAILURE(status)) { return nullptr; }
915
916	LocalUResourceBundlePointer metadata(
917	ures_openDirectures_openDirect_77(nullptr, "metadata", &status));
918	LocalUResourceBundlePointer metadataAlias(
919	ures_getByKeyures_getByKey_77(metadata.getAlias(), "alias", nullptr, &status));
920	LocalUResourceBundlePointer languageAlias(
921	ures_getByKeyures_getByKey_77(metadataAlias.getAlias(), "language", nullptr, &status));
922	LocalUResourceBundlePointer scriptAlias(
923	ures_getByKeyures_getByKey_77(metadataAlias.getAlias(), "script", nullptr, &status));
924	LocalUResourceBundlePointer territoryAlias(
925	ures_getByKeyures_getByKey_77(metadataAlias.getAlias(), "territory", nullptr, &status));
926	LocalUResourceBundlePointer variantAlias(
927	ures_getByKeyures_getByKey_77(metadataAlias.getAlias(), "variant", nullptr, &status));
928	LocalUResourceBundlePointer subdivisionAlias(
929	ures_getByKeyures_getByKey_77(metadataAlias.getAlias(), "subdivision", nullptr, &status));
930
931	if (U_FAILURE(status)) {
932	return nullptr;
933	}
934	int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
935	variantLength = 0, subdivisionLength = 0;
936
937	// Read the languageAlias into languageTypes, languageReplacementIndexes
938	// and strings
939	UniqueCharStrings strings(status);
940	LocalMemory<const char*> languageTypes;
941	LocalMemory<int32_t> languageReplacementIndexes;
942	readLanguageAlias(languageAlias.getAlias(),
943	&strings,
944	languageTypes,
945	languageReplacementIndexes,
946	languagesLength,
947	status);
948
949	// Read the scriptAlias into scriptTypes, scriptReplacementIndexes
950	// and strings
951	LocalMemory<const char*> scriptTypes;
952	LocalMemory<int32_t> scriptReplacementIndexes;
953	readScriptAlias(scriptAlias.getAlias(),
954	&strings,
955	scriptTypes,
956	scriptReplacementIndexes,
957	scriptLength,
958	status);
959
960	// Read the territoryAlias into territoryTypes, territoryReplacementIndexes
961	// and strings
962	LocalMemory<const char*> territoryTypes;
963	LocalMemory<int32_t> territoryReplacementIndexes;
964	readTerritoryAlias(territoryAlias.getAlias(),
965	&strings,
966	territoryTypes,
967	territoryReplacementIndexes,
968	territoryLength, status);
969
970	// Read the variantAlias into variantTypes, variantReplacementIndexes
971	// and strings
972	LocalMemory<const char*> variantTypes;
973	LocalMemory<int32_t> variantReplacementIndexes;
974	readVariantAlias(variantAlias.getAlias(),
975	&strings,
976	variantTypes,
977	variantReplacementIndexes,
978	variantLength, status);
979
980	// Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
981	// and strings
982	LocalMemory<const char*> subdivisionTypes;
983	LocalMemory<int32_t> subdivisionReplacementIndexes;
984	readSubdivisionAlias(subdivisionAlias.getAlias(),
985	&strings,
986	subdivisionTypes,
987	subdivisionReplacementIndexes,
988	subdivisionLength, status);
989
990	if (U_FAILURE(status)) {
991	return nullptr;
992	}
993
994	// We can only use strings after freeze it.
995	strings.freeze();
996
997	// Build the languageMap from languageTypes & languageReplacementIndexes
998	CharStringMap languageMap(490, status);
999	for (int32_t i = 0; U_SUCCESS(status) && i < languagesLength; i++) {
1000	languageMap.put(languageTypes[i],
1001	strings.get(languageReplacementIndexes[i]),
1002	status);
1003	}
1004
1005	// Build the scriptMap from scriptTypes & scriptReplacementIndexes
1006	CharStringMap scriptMap(1, status);
1007	for (int32_t i = 0; U_SUCCESS(status) && i < scriptLength; i++) {
1008	scriptMap.put(scriptTypes[i],
1009	strings.get(scriptReplacementIndexes[i]),
1010	status);
1011	}
1012
1013	// Build the territoryMap from territoryTypes & territoryReplacementIndexes
1014	CharStringMap territoryMap(650, status);
1015	for (int32_t i = 0; U_SUCCESS(status) && i < territoryLength; i++) {
1016	territoryMap.put(territoryTypes[i],
1017	strings.get(territoryReplacementIndexes[i]),
1018	status);
1019	}
1020
1021	// Build the variantMap from variantTypes & variantReplacementIndexes.
1022	CharStringMap variantMap(2, status);
1023	for (int32_t i = 0; U_SUCCESS(status) && i < variantLength; i++) {
1024	variantMap.put(variantTypes[i],
1025	strings.get(variantReplacementIndexes[i]),
1026	status);
1027	}
1028
1029	// Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
1030	CharStringMap subdivisionMap(2, status);
1031	for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
1032	subdivisionMap.put(subdivisionTypes[i],
1033	strings.get(subdivisionReplacementIndexes[i]),
1034	status);
1035	}
1036
1037	if (U_FAILURE(status)) {
1038	return nullptr;
1039	}
1040
1041	// copy hashtables
1042	auto *data = new AliasData(
1043	std::move(languageMap),
1044	std::move(scriptMap),
1045	std::move(territoryMap),
1046	std::move(variantMap),
1047	std::move(subdivisionMap),
1048	strings.orphanCharStrings());
1049
1050	if (data == nullptr) {
1051	status = U_MEMORY_ALLOCATION_ERROR;
1052	}
1053	return data;
1054	}
1055
1056	/**
1057	* A class that find the replacement values of locale fields by using AliasData.
1058	*/
1059	class AliasReplacer {
1060	public:
1061	AliasReplacer(UErrorCode& status) :
1062	language(nullptr), script(nullptr), region(nullptr),
1063	extensions(nullptr),
1064	// store value in variants only once
1065	variants(nullptr,
1066	([](UElement e1, UElement e2) -> UBool {
1067	return 0==uprv_strcmp((const char)e1.pointer,:: strcmp((const char)e1.pointer, (const char*)e2.pointer)
1068	(const char)e2.pointer):: strcmp((const char)e1.pointer, (const char*)e2.pointer);}),
1069	status),
1070	data(nullptr) {
1071	}
1072	~AliasReplacer() {
1073	}
1074
1075	// Check the fields inside locale, if need to replace fields,
1076	// place the the replaced locale ID in out and return true.
1077	// Otherwise return false for no replacement or error.
1078	bool replace(
1079	const Locale& locale, CharString& out, UErrorCode& status);
1080
1081	private:
1082	const char* language;
1083	const char* script;
1084	const char* region;
1085	const char* extensions;
1086	UVector variants;
1087
1088	const AliasData* data;
1089
1090	inline bool notEmpty(const char* str) {
1091	return str && str[0] != NULL_CHAR'\0';
1092	}
1093
1094	/**
1095	* If replacement is neither null nor empty and input is either null or empty,
1096	* return replacement.
1097	* If replacement is neither null nor empty but input is not empty, return input.
1098	* If replacement is either null or empty and type is either null or empty,
1099	* return input.
1100	* Otherwise return null.
1101	* replacement input type return
1102	* AAA nullptr * AAA
1103	* AAA BBB * BBB
1104	* nullptr \|\| "" CCC nullptr CCC
1105	* nullptr \|\| "" * DDD nullptr
1106	*/
1107	inline const char* deleteOrReplace(
1108	const char* input, const char* type, const char* replacement) {
1109	return notEmpty(replacement) ?
1110	((input == nullptr) ? replacement : input) :
1111	((type == nullptr) ? input : nullptr);
1112	}
1113
1114	inline bool same(const char* a, const char* b) {
1115	if (a == nullptr && b == nullptr) {
1116	return true;
1117	}
1118	if ((a == nullptr && b != nullptr) \|\|
1119	(a != nullptr && b == nullptr)) {
1120	return false;
1121	}
1122	return uprv_strcmp(a, b):: strcmp(a, b) == 0;
1123	}
1124
1125	// Gather fields and generate locale ID into out.
1126	CharString& outputToString(CharString& out, UErrorCode& status);
1127
1128	// Generate the lookup key.
1129	CharString& generateKey(const char* language, const char* region,
1130	const char* variant, CharString& out,
1131	UErrorCode& status);
1132
1133	void parseLanguageReplacement(const char* replacement,
1134	const char*& replaceLanguage,
1135	const char*& replaceScript,
1136	const char*& replaceRegion,
1137	const char*& replaceVariant,
1138	const char*& replaceExtensions,
1139	UVector& toBeFreed,
1140	UErrorCode& status);
1141
1142	// Replace by using languageAlias.
1143	bool replaceLanguage(bool checkLanguage, bool checkRegion,
1144	bool checkVariants, UVector& toBeFreed,
1145	UErrorCode& status);
1146
1147	// Replace by using territoryAlias.
1148	bool replaceTerritory(UVector& toBeFreed, UErrorCode& status);
1149
1150	// Replace by using scriptAlias.
1151	bool replaceScript(UErrorCode& status);
1152
1153	// Replace by using variantAlias.
1154	bool replaceVariant(UErrorCode& status);
1155
1156	// Replace by using subdivisionAlias.
1157	bool replaceSubdivision(StringPiece subdivision,
1158	CharString& output, UErrorCode& status);
1159
1160	// Replace transformed extensions.
1161	bool replaceTransformedExtensions(
1162	CharString& transformedExtensions, CharString& output, UErrorCode& status);
1163	};
1164
1165	CharString&
1166	AliasReplacer::generateKey(
1167	const char* language, const char* region, const char* variant,
1168	CharString& out, UErrorCode& status)
1169	{
1170	if (U_FAILURE(status)) { return out; }
1171	out.append(language, status);
1172	if (notEmpty(region)) {
1173	out.append(SEP_CHAR'_', status)
1174	.append(region, status);
1175	}
1176	if (notEmpty(variant)) {
1177	out.append(SEP_CHAR'_', status)
1178	.append(variant, status);
1179	}
1180	return out;
1181	}
1182
1183	void
1184	AliasReplacer::parseLanguageReplacement(
1185	const char* replacement,
1186	const char*& replacedLanguage,
1187	const char*& replacedScript,
1188	const char*& replacedRegion,
1189	const char*& replacedVariant,
1190	const char*& replacedExtensions,
1191	UVector& toBeFreed,
1192	UErrorCode& status)
1193	{
1194	if (U_FAILURE(status)) {
1195	return;
1196	}
1197	replacedScript = replacedRegion = replacedVariant
1198	= replacedExtensions = nullptr;
1199	if (uprv_strchr(replacement, '_'):: strchr(replacement, '_') == nullptr) {
1200	replacedLanguage = replacement;
1201	// reach the end, just return it.
1202	return;
1203	}
1204	// We have multiple field so we have to allocate and parse
1205	CharString* str =
1206	new CharString(replacement, static_cast<int32_t>(uprv_strlen(replacement):: strlen(replacement)), status);
1207	LocalPointer<CharString> lpStr(str, status);
1208	toBeFreed.adoptElement(lpStr.orphan(), status);
1209	if (U_FAILURE(status)) {
1210	return;
1211	}
1212	char* data = str->data();
1213	replacedLanguage = (const char*) data;
1214	char* endOfField = uprv_strchr(data, '_'):: strchr(data, '_');
1215	*endOfField = '\0'; // null terminiate it.
1216	endOfField++;
1217	const char* start = endOfField;
1218	endOfField = const_cast<char*>(uprv_strchr(start, '_'):: strchr(start, '_'));
1219	size_t len = 0;
1220	if (endOfField == nullptr) {
1221	len = uprv_strlen(start):: strlen(start);
1222	} else {
1223	len = endOfField - start;
1224	*endOfField = '\0'; // null terminiate it.
1225	}
1226	if (len == 4 && uprv_isASCIILetteruprv_isASCIILetter_77(*start)) {
1227	// Got a script
1228	replacedScript = start;
1229	if (endOfField == nullptr) {
1230	return;
1231	}
1232	start = endOfField++;
1233	endOfField = const_cast<char*>(uprv_strchr(start, '_'):: strchr(start, '_'));
1234	if (endOfField == nullptr) {
1235	len = uprv_strlen(start):: strlen(start);
1236	} else {
1237	len = endOfField - start;
1238	*endOfField = '\0'; // null terminiate it.
1239	}
1240	}
1241	if (len >= 2 && len <= 3) {
1242	// Got a region
1243	replacedRegion = start;
1244	if (endOfField == nullptr) {
1245	return;
1246	}
1247	start = endOfField++;
1248	endOfField = const_cast<char*>(uprv_strchr(start, '_'):: strchr(start, '_'));
1249	if (endOfField == nullptr) {
1250	len = uprv_strlen(start):: strlen(start);
1251	} else {
1252	len = endOfField - start;
1253	*endOfField = '\0'; // null terminiate it.
1254	}
1255	}
1256	if (len >= 4) {
1257	// Got a variant
1258	replacedVariant = start;
1259	if (endOfField == nullptr) {
1260	return;
1261	}
1262	start = endOfField++;
1263	}
1264	replacedExtensions = start;
1265	}
1266
1267	bool
1268	AliasReplacer::replaceLanguage(
1269	bool checkLanguage, bool checkRegion,
1270	bool checkVariants, UVector& toBeFreed, UErrorCode& status)
1271	{
1272	if (U_FAILURE(status)) {
1273	return false;
1274	}
1275	if ( (checkRegion && region == nullptr) \|\|
1276	(checkVariants && variants.size() == 0)) {
1277	// Nothing to search.
1278	return false;
1279	}
1280	int32_t variant_size = checkVariants ? variants.size() : 1;
1281	// Since we may have more than one variant, we need to loop through them.
1282	const char* searchLanguage = checkLanguage ? language : "und";
1283	const char* searchRegion = checkRegion ? region : nullptr;
1284	const char* searchVariant = nullptr;
1285	for (int32_t variant_index = 0;
1286	variant_index < variant_size;
1287	variant_index++) {
1288	if (checkVariants) {
1289	U_ASSERT(variant_index < variant_size)(static_cast <bool> (variant_index < variant_size) ? void (0) : __assert_fail ("variant_index < variant_size", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ));
1290	searchVariant = static_cast<const char*>(variants.elementAt(variant_index));
1291	}
1292
1293	if (searchVariant != nullptr && uprv_strlen(searchVariant):: strlen(searchVariant) < 4) {
1294	// Do not consider ill-formed variant subtag.
1295	searchVariant = nullptr;
1296	}
1297	CharString typeKey;
1298	generateKey(searchLanguage, searchRegion, searchVariant, typeKey,
1299	status);
1300	if (U_FAILURE(status)) {
1301	return false;
1302	}
1303	const char *replacement = data->languageMap().get(typeKey.data());
1304	if (replacement == nullptr) {
1305	// Found no replacement data.
1306	continue;
1307	}
1308
1309	const char* replacedLanguage = nullptr;
1310	const char* replacedScript = nullptr;
1311	const char* replacedRegion = nullptr;
1312	const char* replacedVariant = nullptr;
1313	const char* replacedExtensions = nullptr;
1314	parseLanguageReplacement(replacement,
1315	replacedLanguage,
1316	replacedScript,
1317	replacedRegion,
1318	replacedVariant,
1319	replacedExtensions,
1320	toBeFreed,
1321	status);
1322	replacedLanguage =
1323	(replacedLanguage != nullptr && uprv_strcmp(replacedLanguage, "und"):: strcmp(replacedLanguage, "und") == 0) ?
1324	language : replacedLanguage;
1325	replacedScript = deleteOrReplace(script, nullptr, replacedScript);
1326	replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion);
1327	replacedVariant = deleteOrReplace(
1328	searchVariant, searchVariant, replacedVariant);
1329
1330	if ( same(language, replacedLanguage) &&
1331	same(script, replacedScript) &&
1332	same(region, replacedRegion) &&
1333	same(searchVariant, replacedVariant) &&
1334	replacedExtensions == nullptr) {
1335	// Replacement produce no changes.
1336	continue;
1337	}
1338
1339	language = replacedLanguage;
1340	region = replacedRegion;
1341	script = replacedScript;
1342	if (searchVariant != nullptr) {
1343	if (notEmpty(replacedVariant)) {
1344	variants.setElementAt((void*)replacedVariant, variant_index);
1345	} else {
1346	variants.removeElementAt(variant_index);
1347	}
1348	}
1349	if (replacedExtensions != nullptr) {
1350	// DO NOTHING
1351	// UTS35 does not specify what should we do if we have extensions in the
1352	// replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
1353	// extensions in them languageAlias:
1354	// i_default => en_x_i_default
1355	// i_enochian => und_x_i_enochian
1356	// i_mingo => see_x_i_mingo
1357	// zh_min => nan_x_zh_min
1358	// But all of them are already changed by code inside ultag_parse() before
1359	// hitting this code.
1360	}
1361
1362	// Something changed by language alias data.
1363	return true;
1364	}
1365	// Nothing changed by language alias data.
1366	return false;
1367	}
1368
1369	bool
1370	AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
1371	{
1372	if (U_FAILURE(status)) {
1373	return false;
1374	}
1375	if (region == nullptr) {
1376	// No region to search.
1377	return false;
1378	}
1379	const char *replacement = data->territoryMap().get(region);
1380	if (replacement == nullptr) {
1381	// Found no replacement data for this region.
1382	return false;
1383	}
1384	const char* replacedRegion = replacement;
1385	const char* firstSpace = uprv_strchr(replacement, ' '):: strchr(replacement, ' ');
1386	if (firstSpace != nullptr) {
1387	// If there are are more than one region in the replacement.
1388	// We need to check which one match based on the language.
1389	// Cannot use nullptr for language because that will construct
1390	// the default locale, in that case, use "und" to get the correct
1391	// locale.
1392	Locale l = LocaleBuilder()
1393	.setLanguage(language == nullptr ? "und" : language)
1394	.setScript(script)
1395	.build(status);
1396	l.addLikelySubtags(status);
1397	const char* likelyRegion = l.getCountry();
1398	LocalPointer<CharString> item;
1399	if (likelyRegion != nullptr && uprv_strlen(likelyRegion):: strlen(likelyRegion) > 0) {
1400	size_t len = uprv_strlen(likelyRegion):: strlen(likelyRegion);
1401	const char* foundInReplacement = uprv_strstr(replacement,:: strstr(replacement, likelyRegion)
1402	likelyRegion):: strstr(replacement, likelyRegion);
1403	if (foundInReplacement != nullptr) {
1404	// Assuming the case there are no three letter region code in
1405	// the replacement of territoryAlias
1406	U_ASSERT(foundInReplacement == replacement \|\|(static_cast <bool> (foundInReplacement == replacement \|\| (foundInReplacement-1) == ' ') ? void (0) : __assert_fail ( "foundInReplacement == replacement \|\| (foundInReplacement-1) == ' '" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ))
1407	(foundInReplacement-1) == ' ')(static_cast <bool> (foundInReplacement == replacement \|\| (foundInReplacement-1) == ' ') ? void (0) : __assert_fail ( "foundInReplacement == replacement \|\| *(foundInReplacement-1) == ' '" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ));
1408	U_ASSERT(foundInReplacement[len] == ' ' \|\|(static_cast <bool> (foundInReplacement[len] == ' ' \|\| foundInReplacement [len] == '\0') ? void (0) : __assert_fail ("foundInReplacement[len] == ' ' \|\| foundInReplacement[len] == '\\0'" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ))
1409	foundInReplacement[len] == '\0')(static_cast <bool> (foundInReplacement[len] == ' ' \|\| foundInReplacement [len] == '\0') ? void (0) : __assert_fail ("foundInReplacement[len] == ' ' \|\| foundInReplacement[len] == '\\0'" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ));
1410	item.adoptInsteadAndCheckErrorCode(
1411	new CharString(foundInReplacement, static_cast<int32_t>(len), status), status);
1412	}
1413	}
1414	if (item.isNull() && U_SUCCESS(status)) {
1415	item.adoptInsteadAndCheckErrorCode(
1416	new CharString(replacement,
1417	static_cast<int32_t>(firstSpace - replacement), status), status);
1418	}
1419	if (U_FAILURE(status)) { return false; }
1420	replacedRegion = item->data();
1421	toBeFreed.adoptElement(item.orphan(), status);
1422	if (U_FAILURE(status)) { return false; }
1423	}
1424	U_ASSERT(!same(region, replacedRegion))(static_cast <bool> (!same(region, replacedRegion)) ? void (0) : __assert_fail ("!same(region, replacedRegion)", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__));
1425	region = replacedRegion;
1426	// The region is changed by data in territory alias.
1427	return true;
1428	}
1429
1430	bool
1431	AliasReplacer::replaceScript(UErrorCode& status)
1432	{
1433	if (U_FAILURE(status)) {
1434	return false;
1435	}
1436	if (script == nullptr) {
1437	// No script to search.
1438	return false;
1439	}
1440	const char *replacement = data->scriptMap().get(script);
1441	if (replacement == nullptr) {
1442	// Found no replacement data for this script.
1443	return false;
1444	}
1445	U_ASSERT(!same(script, replacement))(static_cast <bool> (!same(script, replacement)) ? void (0) : __assert_fail ("!same(script, replacement)", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__));
1446	script = replacement;
1447	// The script is changed by data in script alias.
1448	return true;
1449	}
1450
1451	bool
1452	AliasReplacer::replaceVariant(UErrorCode& status)
1453	{
1454	if (U_FAILURE(status)) {
1455	return false;
1456	}
1457	// Since we may have more than one variant, we need to loop through them.
1458	for (int32_t i = 0; i < variants.size(); i++) {
1459	const char* variant = static_cast<const char*>(variants.elementAt(i));
1460	const char *replacement = data->variantMap().get(variant);
1461	if (replacement == nullptr) {
1462	// Found no replacement data for this variant.
1463	continue;
1464	}
1465	U_ASSERT((uprv_strlen(replacement) >= 5 &&(static_cast <bool> ((:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) \|\| (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement [0] <= '9')) ? void (0) : __assert_fail ("(:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) \|\| (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement[0] <= '9')" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ))
1466	uprv_strlen(replacement) <= 8) \|\|(static_cast <bool> ((:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) \|\| (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement [0] <= '9')) ? void (0) : __assert_fail ("(:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) \|\| (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement[0] <= '9')" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ))
1467	(uprv_strlen(replacement) == 4 &&(static_cast <bool> ((:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) \|\| (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement [0] <= '9')) ? void (0) : __assert_fail ("(:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) \|\| (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement[0] <= '9')" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ))
1468	replacement[0] >= '0' &&(static_cast <bool> ((:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) \|\| (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement [0] <= '9')) ? void (0) : __assert_fail ("(:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) \|\| (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement[0] <= '9')" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ))
1469	replacement[0] <= '9'))(static_cast <bool> ((:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) \|\| (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement [0] <= '9')) ? void (0) : __assert_fail ("(:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) \|\| (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement[0] <= '9')" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ));
1470	if (!same(variant, replacement)) {
1471	variants.setElementAt((void*)replacement, i);
1472	// Special hack to handle hepburn-heploc => alalc97
1473	if (uprv_strcmp(variant, "heploc"):: strcmp(variant, "heploc") == 0) {
1474	for (int32_t j = 0; j < variants.size(); j++) {
1475	if (uprv_strcmp((const char)(variants.elementAt(j)),:: strcmp((const char)(variants.elementAt(j)), "hepburn")
1476	"hepburn"):: strcmp((const char*)(variants.elementAt(j)), "hepburn") == 0) {
1477	variants.removeElementAt(j);
1478	}
1479	}
1480	}
1481	return true;
1482	}
1483	}
1484	return false;
1485	}
1486
1487	bool
1488	AliasReplacer::replaceSubdivision(
1489	StringPiece subdivision, CharString& output, UErrorCode& status)
1490	{
1491	if (U_FAILURE(status)) {
1492	return false;
1493	}
1494	const char *replacement = data->subdivisionMap().get(subdivision.data());
1495	if (replacement != nullptr) {
1496	const char* firstSpace = uprv_strchr(replacement, ' '):: strchr(replacement, ' ');
1497	// Found replacement data for this subdivision.
1498	size_t len = (firstSpace != nullptr) ?
1499	(firstSpace - replacement) : uprv_strlen(replacement):: strlen(replacement);
1500	if (2 <= len && len <= 8) {
1501	output.append(replacement, static_cast<int32_t>(len), status);
1502	if (2 == len) {
1503	// Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
1504	output.append("zzzz", 4, status);
1505	}
1506	}
1507	return true;
1508	}
1509	return false;
1510	}
1511
1512	bool
1513	AliasReplacer::replaceTransformedExtensions(
1514	CharString& transformedExtensions, CharString& output, UErrorCode& status)
1515	{
1516	// The content of the transformedExtensions will be modified in this
1517	// function to NUL-terminating (tkey-tvalue) pairs.
1518	if (U_FAILURE(status)) {
1519	return false;
1520	}
1521	int32_t len = transformedExtensions.length();
1522	const char* str = transformedExtensions.data();
1523	const char* tkey = ultag_getTKeyStartultag_getTKeyStart_77(str);
1524	int32_t tlangLen = (tkey == str) ? 0 :
1525	((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
1526	if (tlangLen > 0) {
1527	Locale tlang = LocaleBuilder()
1528	.setLanguageTag(StringPiece(str, tlangLen))
1529	.build(status);
1530	tlang.canonicalize(status);
1531	output = tlang.toLanguageTag<CharString>(status);
1532	if (U_FAILURE(status)) {
1533	return false;
1534	}
1535	T_CString_toLowerCaseT_CString_toLowerCase_77(output.data());
1536	}
1537	if (tkey != nullptr) {
1538	// We need to sort the tfields by tkey
1539	UVector tfields(status);
1540	if (U_FAILURE(status)) {
1541	return false;
1542	}
1543	do {
1544	const char* tvalue = uprv_strchr(tkey, '-'):: strchr(tkey, '-');
1545	if (tvalue == nullptr) {
1546	status = U_ILLEGAL_ARGUMENT_ERROR;
1547	return false;
1548	}
1549	const char* nextTKey = ultag_getTKeyStartultag_getTKeyStart_77(tvalue);
1550	if (nextTKey != nullptr) {
1551	const_cast<char>(nextTKey - 1) = '\0'; // NUL terminate tvalue
1552	}
1553	tfields.insertElementAt((void*)tkey, tfields.size(), status);
1554	if (U_FAILURE(status)) {
1555	return false;
1556	}
1557	tkey = nextTKey;
1558	} while (tkey != nullptr);
1559	tfields.sort([](UElement e1, UElement e2) -> int32_t {
1560	return uprv_strcmp((const char)e1.pointer, (const char)e2.pointer):: strcmp((const char)e1.pointer, (const char)e2.pointer);
1561	}, status);
1562	for (int32_t i = 0; i < tfields.size(); i++) {
1563	if (output.length() > 0) {
1564	output.append('-', status);
1565	}
1566	const char* tfield = static_cast<const char*>(tfields.elementAt(i));
1567	const char* tvalue = uprv_strchr(tfield, '-'):: strchr(tfield, '-');
1568	if (tvalue == nullptr) {
1569	status = U_ILLEGAL_ARGUMENT_ERROR;
1570	return false;
1571	}
1572	// Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
1573	const_cast<char>(tvalue++) = '\0'; // NUL terminate tkey
1574	output.append(tfield, status).append('-', status);
1575	std::optional<std::string_view> bcpTValue = ulocimp_toBcpTypeulocimp_toBcpType_77(tfield, tvalue);
1576	output.append(bcpTValue.has_value() ? *bcpTValue : tvalue, status);
1577	}
1578	}
1579	if (U_FAILURE(status)) {
1580	return false;
1581	}
1582	return true;
1583	}
1584
1585	CharString&
1586	AliasReplacer::outputToString(
1587	CharString& out, UErrorCode& status)
1588	{
1589	if (U_FAILURE(status)) { return out; }
1590	out.append(language, status);
1591	if (notEmpty(script)) {
1592	out.append(SEP_CHAR'_', status)
1593	.append(script, status);
1594	}
1595	if (notEmpty(region)) {
1596	out.append(SEP_CHAR'_', status)
1597	.append(region, status);
1598	}
1599	if (variants.size() > 0) {
1600	if (!notEmpty(script) && !notEmpty(region)) {
1601	out.append(SEP_CHAR'_', status);
1602	}
1603	variants.sort([](UElement e1, UElement e2) -> int32_t {
1604	return uprv_strcmp((const char)e1.pointer, (const char)e2.pointer):: strcmp((const char)e1.pointer, (const char)e2.pointer);
1605	}, status);
1606	int32_t variantsStart = out.length();
1607	for (int32_t i = 0; i < variants.size(); i++) {
1608	out.append(SEP_CHAR'_', status)
1609	.append(static_cast<const char*>(variants.elementAt(i)),
1610	status);
1611	}
1612	T_CString_toUpperCaseT_CString_toUpperCase_77(out.data() + variantsStart);
1613	}
1614	if (notEmpty(extensions)) {
1615	CharString tmp("und_", status);
1616	tmp.append(extensions, status);
1617	Locale tmpLocale(tmp.data());
1618	// only support x extension inside CLDR for now.
1619	U_ASSERT(extensions[0] == 'x')(static_cast <bool> (extensions[0] == 'x') ? void (0) : __assert_fail ("extensions[0] == 'x'", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__));
1620	out.append(tmpLocale.getName() + 1, status);
1621	}
1622	return out;
1623	}
1624
1625	bool
1626	AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status)
1627	{
1628	data = AliasData::singleton(status);
1629	if (U_FAILURE(status)) {
1630	return false;
1631	}
1632	U_ASSERT(data != nullptr)(static_cast <bool> (data != nullptr) ? void (0) : __assert_fail ("data != nullptr", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__));
1633	out.clear();
1634	language = locale.getLanguage();
1635	if (!notEmpty(language)) {
1636	language = nullptr;
1637	}
1638	script = locale.getScript();
1639	if (!notEmpty(script)) {
1640	script = nullptr;
1641	}
1642	region = locale.getCountry();
1643	if (!notEmpty(region)) {
1644	region = nullptr;
1645	}
1646	const char* variantsStr = locale.getVariant();
1647	CharString variantsBuff(variantsStr, -1, status);
1648	if (!variantsBuff.isEmpty()) {
1649	if (U_FAILURE(status)) { return false; }
1650	char* start = variantsBuff.data();
1651	T_CString_toLowerCaseT_CString_toLowerCase_77(start);
1652	char* end;
1653	while ((end = uprv_strchr(start, SEP_CHAR):: strchr(start, '_')) != nullptr &&
1654	U_SUCCESS(status)) {
1655	*end = NULL_CHAR'\0'; // null terminate inside variantsBuff
1656	// do not add "" or duplicate data to variants
1657	if (*start && !variants.contains(start)) {
1658	variants.addElement(start, status);
1659	}
1660	start = end + 1;
1661	}
1662	// do not add "" or duplicate data to variants
1663	if (*start && !variants.contains(start)) {
1664	variants.addElement(start, status);
1665	}
1666	}
1667	if (U_FAILURE(status)) { return false; }
1668
1669	// Sort the variants
1670	variants.sort([](UElement e1, UElement e2) -> int32_t {
1671	return uprv_strcmp((const char)e1.pointer, (const char)e2.pointer):: strcmp((const char)e1.pointer, (const char)e2.pointer);
1672	}, status);
1673
1674	// A changed count to assert when loop too many times.
1675	int changed = 0;
1676	// A UVector to to hold CharString allocated by the replace* method
1677	// and freed when out of scope from his function.
1678	UVector stringsToBeFreed([](void obj) { delete static_cast<CharString>(obj); },
1679	nullptr, 10, status);
1680	while (U_SUCCESS(status)) {
1681	// Something wrong with the data cause looping here more than 10 times
1682	// already.
1683	U_ASSERT(changed < 5)(static_cast <bool> (changed < 5) ? void (0) : __assert_fail ("changed < 5", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__));
1684	// From observation of key in data/misc/metadata.txt
1685	// we know currently we only need to search in the following combination
1686	// of fields for type in languageAlias:
1687	// * lang_region_variant
1688	// * lang_region
1689	// * lang_variant
1690	// * lang
1691	// * und_variant
1692	// This assumption is ensured by the U_ASSERT in readLanguageAlias
1693	//
1694	// lang REGION variant
1695	if ( replaceLanguage(true, true, true, stringsToBeFreed, status) \|\|
1696	replaceLanguage(true, true, false, stringsToBeFreed, status) \|\|
1697	replaceLanguage(true, false, true, stringsToBeFreed, status) \|\|
1698	replaceLanguage(true, false, false, stringsToBeFreed, status) \|\|
1699	replaceLanguage(false,false, true, stringsToBeFreed, status) \|\|
1700	replaceTerritory(stringsToBeFreed, status) \|\|
1701	replaceScript(status) \|\|
1702	replaceVariant(status)) {
1703	// Some values in data is changed, try to match from the beginning
1704	// again.
1705	changed++;
1706	continue;
1707	}
1708	// Nothing changed. Break out.
1709	break;
1710	} // while(1)
1711
1712	if (U_FAILURE(status)) { return false; }
1713	// Nothing changed and we know the order of the variants are not change
1714	// because we have no variant or only one.
1715	const char* extensionsStr = locale_getKeywordsStartlocale_getKeywordsStart_77(locale.getName());
1716	if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
1717	return false;
1718	}
1719	outputToString(out, status);
1720	if (U_FAILURE(status)) {
1721	return false;
1722	}
1723	if (extensionsStr != nullptr) {
1724	changed = 0;
1725	Locale temp(locale);
1726	LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
1727	if (U_SUCCESS(status) && !iter.isNull()) {
1728	const char* key;
1729	while ((key = iter->next(nullptr, status)) != nullptr) {
1730	if (uprv_strcmp("sd", key):: strcmp("sd", key) == 0 \|\| uprv_strcmp("rg", key):: strcmp("rg", key) == 0 \|\|
1731	uprv_strcmp("t", key):: strcmp("t", key) == 0) {
1732	auto value = locale.getKeywordValue<CharString>(key, status);
1733	if (U_FAILURE(status)) {
1734	status = U_ZERO_ERROR;
1735	continue;
1736	}
1737	CharString replacement;
1738	if (uprv_strlen(key):: strlen(key) == 2) {
1739	if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
1740	changed++;
1741	temp.setKeywordValue(key, replacement.data(), status);
1742	}
1743	} else {
1744	U_ASSERT(uprv_strcmp(key, "t") == 0)(static_cast <bool> (:: strcmp(key, "t") == 0) ? void ( 0) : __assert_fail (":: strcmp(key, \"t\") == 0", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__));
1745	if (replaceTransformedExtensions(value, replacement, status)) {
1746	changed++;
1747	temp.setKeywordValue(key, replacement.data(), status);
1748	}
1749	}
1750	if (U_FAILURE(status)) {
1751	return false;
1752	}
1753	}
1754	}
1755	}
1756	if (changed != 0) {
1757	extensionsStr = locale_getKeywordsStartlocale_getKeywordsStart_77(temp.getName());
1758	}
1759	out.append(extensionsStr, status);
1760	}
1761	if (U_FAILURE(status)) {
1762	return false;
1763	}
1764	// If the tag is not changed, return.
1765	if (uprv_strcmp(out.data(), locale.getName()):: strcmp(out.data(), locale.getName()) == 0) {
1766	out.clear();
1767	return false;
1768	}
1769	return true;
1770	}
1771
1772	// Return true if the locale is changed during canonicalization.
1773	// The replaced value then will be put into out.
1774	bool
1775	canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
1776	{
1777	if (U_FAILURE(status)) { return false; }
1778	AliasReplacer replacer(status);
1779	return replacer.replace(locale, out, status);
1780	}
1781
1782	// Function to optimize for known cases without so we can skip the loading
1783	// of resources in the startup time until we really need it.
1784	bool
1785	isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
1786	{
1787	if (U_FAILURE(status)) { return false; }
1788
1789	if ( uprv_strcmp(locale, "c"):: strcmp(locale, "c") == 0 \|\|
1790	uprv_strcmp(locale, "en"):: strcmp(locale, "en") == 0 \|\|
1791	uprv_strcmp(locale, "en_US"):: strcmp(locale, "en_US") == 0) {
1792	return true;
1793	}
1794
1795	// common well-known Canonicalized.
1796	umtx_initOnce(gKnownCanonicalizedInitOnce,
1797	&loadKnownCanonicalized, status);
1798	if (U_FAILURE(status)) {
1799	return false;
1800	}
1801	U_ASSERT(gKnownCanonicalized != nullptr)(static_cast <bool> (gKnownCanonicalized != nullptr) ? void (0) : __assert_fail ("gKnownCanonicalized != nullptr", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__));
1802	return uhash_getiuhash_geti_77(gKnownCanonicalized, locale) != 0;
1803	}
1804
1805	} // namespace
1806
1807	U_NAMESPACE_END}
1808
1809	// Function for testing.
1810	U_EXPORT const char* const*
1811	ulocimp_getKnownCanonicalizedLocaleForTestulocimp_getKnownCanonicalizedLocaleForTest_77(int32_t& length)
1812	{
1813	U_NAMESPACE_USEusing namespace icu_77;
1814	length = UPRV_LENGTHOF(KNOWN_CANONICALIZED)(int32_t)(sizeof(KNOWN_CANONICALIZED)/sizeof((KNOWN_CANONICALIZED )[0]));
1815	return KNOWN_CANONICALIZED;
1816	}
1817
1818	// Function for testing.
1819	U_EXPORT bool
1820	ulocimp_isCanonicalizedLocaleForTestulocimp_isCanonicalizedLocaleForTest_77(const char* localeName)
1821	{
1822	U_NAMESPACE_USEusing namespace icu_77;
1823	Locale l(localeName);
1824	UErrorCode status = U_ZERO_ERROR;
1825	CharString temp;
1826	return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status);
1827	}
1828
1829	U_NAMESPACE_BEGINnamespace icu_77 {
1830
1831	Locale& Locale::init(const char* localeID, UBool canonicalize)
1832	{
1833	return localeID == nullptr ? *this = getDefault() : init(StringPiece{localeID}, canonicalize);
1834	}
1835
1836	/This function initializes a Locale from a C locale ID/
1837	Locale& Locale::init(StringPiece localeID, UBool canonicalize)
1838	{
1839	fIsBogus = false;
1840	/* Free our current storage */
1841	if ((baseName != fullName) && (baseName != fullNameBuffer)) {
1842	uprv_freeuprv_free_77(baseName);
1843	}
1844	baseName = nullptr;
1845	if(fullName != fullNameBuffer) {
1846	uprv_freeuprv_free_77(fullName);
1847	fullName = fullNameBuffer;
1848	}
1849
1850	// not a loop:
1851	// just an easy way to have a common error-exit
1852	// without goto and without another function
1853	do {
1854	char *separator;
1855	char *field[5] = {nullptr};
1856	int32_t fieldLen[5] = {0};
1857	int32_t fieldIdx;
1858	int32_t variantField;
1859	int32_t length;
1860	UErrorCode err;
1861
1862	/* preset all fields to empty */
1863	language[0] = script[0] = country[0] = 0;
1864
1865	const auto parse = [canonicalize](std::string_view localeID,
1866	char* name,
1867	int32_t nameCapacity,
1868	UErrorCode& status) {
1869	return ByteSinkUtil::viaByteSinkToTerminatedChars(
1870	name, nameCapacity,
1871	[&](ByteSink& sink, UErrorCode& status) {
1872	if (canonicalize) {
1873	ulocimp_canonicalizeulocimp_canonicalize_77(localeID, sink, status);
1874	} else {
1875	ulocimp_getNameulocimp_getName_77(localeID, sink, status);
1876	}
1877	},
1878	status);
1879	};
1880
1881	// "canonicalize" the locale ID to ICU/Java format
1882	err = U_ZERO_ERROR;
1883	length = parse(localeID, fullName, sizeof fullNameBuffer, err);
1884
1885	if (err == U_BUFFER_OVERFLOW_ERROR \|\| length >= static_cast<int32_t>(sizeof(fullNameBuffer))) {
1886	U_ASSERT(baseName == nullptr)(static_cast <bool> (baseName == nullptr) ? void (0) : __assert_fail ("baseName == nullptr", __builtin_FILE (), __builtin_LINE () , __extension__ __PRETTY_FUNCTION__));
1887	/Go to heap for the fullName if necessary/
1888	char* newFullName = static_cast<char>(uprv_mallocuprv_malloc_77(sizeof(char) (length + 1)));
1889	if (newFullName == nullptr) {
1890	break; // error: out of memory
1891	}
1892	fullName = newFullName;
1893	err = U_ZERO_ERROR;
1894	length = parse(localeID, fullName, length + 1, err);
1895	}
1896	if(U_FAILURE(err) \|\| err == U_STRING_NOT_TERMINATED_WARNING) {
1897	/* should never occur */
1898	break;
1899	}
1900
1901	variantBegin = length;
1902
1903	/* after uloc_getName/canonicalize() we know that only '_' are separators */
1904	/* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
1905	separator = field[0] = fullName;
	Value stored to 'separator' is never read
1906	fieldIdx = 1;
1907	char* at = uprv_strchr(fullName, '@'):: strchr(fullName, '@');
1908	while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR):: strchr(field[fieldIdx-1], '_')) != nullptr &&
1909	fieldIdx < UPRV_LENGTHOF(field)(int32_t)(sizeof(field)/sizeof((field)[0]))-1 &&
1910	(at == nullptr \|\| separator < at)) {
1911	field[fieldIdx] = separator + 1;
1912	fieldLen[fieldIdx - 1] = static_cast<int32_t>(separator - field[fieldIdx - 1]);
1913	fieldIdx++;
1914	}
1915	// variant may contain @foo or .foo POSIX cruft; remove it
1916	separator = uprv_strchr(field[fieldIdx-1], '@'):: strchr(field[fieldIdx-1], '@');
1917	char* sep2 = uprv_strchr(field[fieldIdx-1], '.'):: strchr(field[fieldIdx-1], '.');
1918	if (separator!=nullptr \|\| sep2!=nullptr) {
1919	if (separator==nullptr \|\| (sep2!=nullptr && separator > sep2)) {
1920	separator = sep2;
1921	}
1922	fieldLen[fieldIdx - 1] = static_cast<int32_t>(separator - field[fieldIdx - 1]);
1923	} else {
1924	fieldLen[fieldIdx - 1] = length - static_cast<int32_t>(field[fieldIdx - 1] - fullName);
1925	}
1926
1927	if (fieldLen[0] >= static_cast<int32_t>(sizeof(language)))
1928	{
1929	break; // error: the language field is too long
1930	}
1931
1932	variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
1933	if (fieldLen[0] > 0) {
1934	/* We have a language */
1935	uprv_memcpy(language, fullName, fieldLen[0])do { clang diagnostic push clang diagnostic ignored "-Waddress" (static_cast <bool> (language != __null) ? void (0) : __assert_fail ("language != __null", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); (static_cast <bool > (fullName != __null) ? void (0) : __assert_fail ("fullName != __null" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); clang diagnostic pop :: memcpy(language, fullName, fieldLen [0]); } while (false);
1936	language[fieldLen[0]] = 0;
1937	}
1938	if (fieldLen[1] == 4 && uprv_isASCIILetteruprv_isASCIILetter_77(field[1][0]) &&
1939	uprv_isASCIILetteruprv_isASCIILetter_77(field[1][1]) && uprv_isASCIILetteruprv_isASCIILetter_77(field[1][2]) &&
1940	uprv_isASCIILetteruprv_isASCIILetter_77(field[1][3])) {
1941	/* We have at least a script */
1942	uprv_memcpy(script, field[1], fieldLen[1])do { clang diagnostic push clang diagnostic ignored "-Waddress" (static_cast <bool> (script != __null) ? void (0) : __assert_fail ("script != __null", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); (static_cast <bool> (field[1] != __null) ? void (0) : __assert_fail ("field[1] != __null", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); clang diagnostic pop :: memcpy(script, field[1], fieldLen[1]); } while (false);
1943	script[fieldLen[1]] = 0;
1944	variantField++;
1945	}
1946
1947	if (fieldLen[variantField] == 2 \|\| fieldLen[variantField] == 3) {
1948	/* We have a country */
1949	uprv_memcpy(country, field[variantField], fieldLen[variantField])do { clang diagnostic push clang diagnostic ignored "-Waddress" (static_cast <bool> (country != __null) ? void (0) : __assert_fail ("country != __null", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); (static_cast <bool> (field[variantField ] != __null) ? void (0) : __assert_fail ("field[variantField] != __null" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); clang diagnostic pop :: memcpy(country, field[variantField ], fieldLen[variantField]); } while (false);
1950	country[fieldLen[variantField]] = 0;
1951	variantField++;
1952	} else if (fieldLen[variantField] == 0) {
1953	variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
1954	}
1955
1956	if (fieldLen[variantField] > 0) {
1957	/* We have a variant */
1958	variantBegin = static_cast<int32_t>(field[variantField] - fullName);
1959	}
1960
1961	err = U_ZERO_ERROR;
1962	initBaseName(err);
1963	if (U_FAILURE(err)) {
1964	break;
1965	}
1966
1967	if (canonicalize) {
1968	if (!isKnownCanonicalizedLocale(fullName, err)) {
1969	CharString replaced;
1970	// Not sure it is already canonicalized
1971	if (canonicalizeLocale(*this, replaced, err)) {
1972	U_ASSERT(U_SUCCESS(err))(static_cast <bool> (U_SUCCESS(err)) ? void (0) : __assert_fail ("U_SUCCESS(err)", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__));
1973	// If need replacement, call init again.
1974	init(replaced.data(), false);
1975	}
1976	if (U_FAILURE(err)) {
1977	break;
1978	}
1979	}
1980	} // if (canonicalize) {
1981
1982	// successful end of init()
1983	return *this;
1984	} while(0); /loop doesn't iterate/
1985
1986	// when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
1987	setToBogus();
1988
1989	return *this;
1990	}
1991
1992	/*
1993	* Set up the base name.
1994	* If there are no key words, it's exactly the full name.
1995	* If key words exist, it's the full name truncated at the '@' character.
1996	* Need to set up both at init() and after setting a keyword.
1997	*/
1998	void
1999	Locale::initBaseName(UErrorCode &status) {
2000	if (U_FAILURE(status)) {
2001	return;
2002	}
2003	U_ASSERT(baseName==nullptr \|\| baseName==fullName)(static_cast <bool> (baseName==nullptr \|\| baseName==fullName ) ? void (0) : __assert_fail ("baseName==nullptr \|\| baseName==fullName" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ));
2004	const char *atPtr = uprv_strchr(fullName, '@'):: strchr(fullName, '@');
2005	const char *eqPtr = uprv_strchr(fullName, '='):: strchr(fullName, '=');
2006	if (atPtr && eqPtr && atPtr < eqPtr) {
2007	// Key words exist.
2008	int32_t baseNameLength = static_cast<int32_t>(atPtr - fullName);
2009	char* newBaseName = static_cast<char*>(uprv_mallocuprv_malloc_77(baseNameLength + 1));
2010	if (newBaseName == nullptr) {
2011	status = U_MEMORY_ALLOCATION_ERROR;
2012	return;
2013	}
2014	baseName = newBaseName;
2015	uprv_strncpy(baseName, fullName, baseNameLength):: strncpy(baseName, fullName, baseNameLength);
2016	baseName[baseNameLength] = 0;
2017
2018	// The original computation of variantBegin leaves it equal to the length
2019	// of fullName if there is no variant. It should instead be
2020	// the length of the baseName.
2021	if (variantBegin > baseNameLength) {
2022	variantBegin = baseNameLength;
2023	}
2024	} else {
2025	baseName = fullName;
2026	}
2027	}
2028
2029
2030	int32_t
2031	Locale::hashCode() const
2032	{
2033	return ustr_hashCharsNustr_hashCharsN_77(fullName, static_cast<int32_t>(uprv_strlen(fullName):: strlen(fullName)));
2034	}
2035
2036	void
2037	Locale::setToBogus() {
2038	/* Free our current storage */
2039	if((baseName != fullName) && (baseName != fullNameBuffer)) {
2040	uprv_freeuprv_free_77(baseName);
2041	}
2042	baseName = nullptr;
2043	if(fullName != fullNameBuffer) {
2044	uprv_freeuprv_free_77(fullName);
2045	fullName = fullNameBuffer;
2046	}
2047	*fullNameBuffer = 0;
2048	*language = 0;
2049	*script = 0;
2050	*country = 0;
2051	fIsBogus = true;
2052	variantBegin = 0;
2053	}
2054
2055	const Locale& U_EXPORT2
2056	Locale::getDefault()
2057	{
2058	{
2059	Mutex lock(&gDefaultLocaleMutex);
2060	if (gDefaultLocale != nullptr) {
2061	return *gDefaultLocale;
2062	}
2063	}
2064	UErrorCode status = U_ZERO_ERROR;
2065	return *locale_set_default_internal(nullptr, status);
2066	}
2067
2068
2069
2070	void U_EXPORT2
2071	Locale::setDefault( const Locale& newLocale,
2072	UErrorCode& status)
2073	{
2074	if (U_FAILURE(status)) {
2075	return;
2076	}
2077
2078	/* Set the default from the full name string of the supplied locale.
2079	* This is a convenient way to access the default locale caching mechanisms.
2080	*/
2081	const char *localeID = newLocale.getName();
2082	locale_set_default_internal(localeID, status);
2083	}
2084
2085	void
2086	Locale::addLikelySubtags(UErrorCode& status) {
2087	if (U_FAILURE(status)) {
2088	return;
2089	}
2090
2091	CharString maximizedLocaleID = ulocimp_addLikelySubtagsulocimp_addLikelySubtags_77(fullName, status);
2092
2093	if (U_FAILURE(status)) {
2094	return;
2095	}
2096
2097	init(maximizedLocaleID.data(), /canonicalize=/false);
2098	if (isBogus()) {
2099	status = U_ILLEGAL_ARGUMENT_ERROR;
2100	}
2101	}
2102
2103	void
2104	Locale::minimizeSubtags(UErrorCode& status) {
2105	Locale::minimizeSubtags(false, status);
2106	}
2107	void
2108	Locale::minimizeSubtags(bool favorScript, UErrorCode& status) {
2109	if (U_FAILURE(status)) {
2110	return;
2111	}
2112
2113	CharString minimizedLocaleID = ulocimp_minimizeSubtagsulocimp_minimizeSubtags_77(fullName, favorScript, status);
2114
2115	if (U_FAILURE(status)) {
2116	return;
2117	}
2118
2119	init(minimizedLocaleID.data(), /canonicalize=/false);
2120	if (isBogus()) {
2121	status = U_ILLEGAL_ARGUMENT_ERROR;
2122	}
2123	}
2124
2125	void
2126	Locale::canonicalize(UErrorCode& status) {
2127	if (U_FAILURE(status)) {
2128	return;
2129	}
2130	if (isBogus()) {
2131	status = U_ILLEGAL_ARGUMENT_ERROR;
2132	return;
2133	}
2134	CharString uncanonicalized(fullName, status);
2135	if (U_FAILURE(status)) {
2136	return;
2137	}
2138	init(uncanonicalized.data(), /canonicalize=/true);
2139	if (isBogus()) {
2140	status = U_ILLEGAL_ARGUMENT_ERROR;
2141	}
2142	}
2143
2144	Locale U_EXPORT2
2145	Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
2146	{
2147	Locale result(Locale::eBOGUS);
2148
2149	if (U_FAILURE(status)) {
2150	return result;
2151	}
2152
2153	// If a BCP 47 language tag is passed as the language parameter to the
2154	// normal Locale constructor, it will actually fall back to invoking
2155	// uloc_forLanguageTag() to parse it if it somehow is able to detect that
2156	// the string actually is BCP 47. This works well for things like strings
2157	// using BCP 47 extensions, but it does not at all work for things like
2158	// legacy language tags (marked as “Type: grandfathered” in BCP 47,
2159	// e.g., "en-GB-oed") which are possible to also
2160	// interpret as ICU locale IDs and because of that won't trigger the BCP 47
2161	// parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
2162	// and then Locale::init(), instead of just calling the normal constructor.
2163
2164	int32_t parsedLength;
2165	CharString localeID = ulocimp_forLanguageTagulocimp_forLanguageTag_77(
2166	tag.data(),
2167	tag.length(),
2168	&parsedLength,
2169	status);
2170
2171	if (U_FAILURE(status)) {
2172	return result;
2173	}
2174
2175	if (parsedLength != tag.size()) {
2176	status = U_ILLEGAL_ARGUMENT_ERROR;
2177	return result;
2178	}
2179
2180	result.init(localeID.data(), /canonicalize=/false);
2181	if (result.isBogus()) {
2182	status = U_ILLEGAL_ARGUMENT_ERROR;
2183	}
2184	return result;
2185	}
2186
2187	void
2188	Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
2189	{
2190	if (U_FAILURE(status)) {
2191	return;
2192	}
2193
2194	if (fIsBogus) {
2195	status = U_ILLEGAL_ARGUMENT_ERROR;
2196	return;
2197	}
2198
2199	ulocimp_toLanguageTagulocimp_toLanguageTag_77(fullName, sink, /strict=/false, status);
2200	}
2201
2202	Locale U_EXPORT2
2203	Locale::createFromName (const char *name)
2204	{
2205	if (name) {
2206	Locale l("");
2207	l.init(name, false);
2208	return l;
2209	}
2210	else {
2211	return getDefault();
2212	}
2213	}
2214
2215	Locale U_EXPORT2
2216	Locale::createFromName(StringPiece name) {
2217	Locale loc("");
2218	loc.init(name, false);
2219	return loc;
2220	}
2221
2222	Locale U_EXPORT2
2223	Locale::createCanonical(const char* name) {
2224	Locale loc("");
2225	loc.init(name, true);
2226	return loc;
2227	}
2228
2229	const char *
2230	Locale::getISO3Language() const
2231	{
2232	return uloc_getISO3Languageuloc_getISO3Language_77(fullName);
2233	}
2234
2235
2236	const char *
2237	Locale::getISO3Country() const
2238	{
2239	return uloc_getISO3Countryuloc_getISO3Country_77(fullName);
2240	}
2241
2242	/**
2243	* Return the LCID value as specified in the "LocaleID" resource for this
2244	* locale. The LocaleID must be expressed as a hexadecimal number, from
2245	* one to four digits. If the LocaleID resource is not present, or is
2246	* in an incorrect format, 0 is returned. The LocaleID is for use in
2247	* Windows (it is an LCID), but is available on all platforms.
2248	*/
2249	uint32_t
2250	Locale::getLCID() const
2251	{
2252	return uloc_getLCIDuloc_getLCID_77(fullName);
2253	}
2254
2255	const char* const* U_EXPORT2 Locale::getISOCountries()
2256	{
2257	return uloc_getISOCountriesuloc_getISOCountries_77();
2258	}
2259
2260	const char* const* U_EXPORT2 Locale::getISOLanguages()
2261	{
2262	return uloc_getISOLanguagesuloc_getISOLanguages_77();
2263	}
2264
2265	// Set the locale's data based on a posix id.
2266	void Locale::setFromPOSIXID(const char *posixID)
2267	{
2268	init(posixID, true);
2269	}
2270
2271	const Locale & U_EXPORT2
2272	Locale::getRoot()
2273	{
2274	return getLocale(eROOT);
2275	}
2276
2277	const Locale & U_EXPORT2
2278	Locale::getEnglish()
2279	{
2280	return getLocale(eENGLISH);
2281	}
2282
2283	const Locale & U_EXPORT2
2284	Locale::getFrench()
2285	{
2286	return getLocale(eFRENCH);
2287	}
2288
2289	const Locale & U_EXPORT2
2290	Locale::getGerman()
2291	{
2292	return getLocale(eGERMAN);
2293	}
2294
2295	const Locale & U_EXPORT2
2296	Locale::getItalian()
2297	{
2298	return getLocale(eITALIAN);
2299	}
2300
2301	const Locale & U_EXPORT2
2302	Locale::getJapanese()
2303	{
2304	return getLocale(eJAPANESE);
2305	}
2306
2307	const Locale & U_EXPORT2
2308	Locale::getKorean()
2309	{
2310	return getLocale(eKOREAN);
2311	}
2312
2313	const Locale & U_EXPORT2
2314	Locale::getChinese()
2315	{
2316	return getLocale(eCHINESE);
2317	}
2318
2319	const Locale & U_EXPORT2
2320	Locale::getSimplifiedChinese()
2321	{
2322	return getLocale(eCHINA);
2323	}
2324
2325	const Locale & U_EXPORT2
2326	Locale::getTraditionalChinese()
2327	{
2328	return getLocale(eTAIWAN);
2329	}
2330
2331
2332	const Locale & U_EXPORT2
2333	Locale::getFrance()
2334	{
2335	return getLocale(eFRANCE);
2336	}
2337
2338	const Locale & U_EXPORT2
2339	Locale::getGermany()
2340	{
2341	return getLocale(eGERMANY);
2342	}
2343
2344	const Locale & U_EXPORT2
2345	Locale::getItaly()
2346	{
2347	return getLocale(eITALY);
2348	}
2349
2350	const Locale & U_EXPORT2
2351	Locale::getJapan()
2352	{
2353	return getLocale(eJAPAN);
2354	}
2355
2356	const Locale & U_EXPORT2
2357	Locale::getKorea()
2358	{
2359	return getLocale(eKOREA);
2360	}
2361
2362	const Locale & U_EXPORT2
2363	Locale::getChina()
2364	{
2365	return getLocale(eCHINA);
2366	}
2367
2368	const Locale & U_EXPORT2
2369	Locale::getPRC()
2370	{
2371	return getLocale(eCHINA);
2372	}
2373
2374	const Locale & U_EXPORT2
2375	Locale::getTaiwan()
2376	{
2377	return getLocale(eTAIWAN);
2378	}
2379
2380	const Locale & U_EXPORT2
2381	Locale::getUK()
2382	{
2383	return getLocale(eUK);
2384	}
2385
2386	const Locale & U_EXPORT2
2387	Locale::getUS()
2388	{
2389	return getLocale(eUS);
2390	}
2391
2392	const Locale & U_EXPORT2
2393	Locale::getCanada()
2394	{
2395	return getLocale(eCANADA);
2396	}
2397
2398	const Locale & U_EXPORT2
2399	Locale::getCanadaFrench()
2400	{
2401	return getLocale(eCANADA_FRENCH);
2402	}
2403
2404	const Locale &
2405	Locale::getLocale(int locid)
2406	{
2407	Locale *localeCache = getLocaleCache();
2408	U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0))(static_cast <bool> ((locid < eMAX_LOCALES)&& (locid>=0)) ? void (0) : __assert_fail ("(locid < eMAX_LOCALES)&&(locid>=0)" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ ));
2409	if (localeCache == nullptr) {
2410	// Failure allocating the locale cache.
2411	// The best we can do is return a nullptr reference.
2412	locid = 0;
2413	}
2414	return localeCache[locid]; /operating on nullptr/
2415	}
2416
2417	/*
2418	This function is defined this way in order to get around static
2419	initialization and static destruction.
2420	*/
2421	Locale *
2422	Locale::getLocaleCache()
2423	{
2424	UErrorCode status = U_ZERO_ERROR;
2425	umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
2426	return gLocaleCache;
2427	}
2428
2429	class KeywordEnumeration : public StringEnumeration {
2430	protected:
2431	CharString keywords;
2432	private:
2433	const char *current;
2434	static const char fgClassID;
2435
2436	public:
2437	static UClassID U_EXPORT2 getStaticClassID() { return (UClassID)&fgClassID; }
2438	virtual UClassID getDynamicClassID() const override { return getStaticClassID(); }
2439	public:
2440	KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
2441	: keywords(), current(keywords.data()) {
2442	if(U_SUCCESS(status) && keywordLen != 0) {
2443	if(keys == nullptr \|\| keywordLen < 0) {
2444	status = U_ILLEGAL_ARGUMENT_ERROR;
2445	} else {
2446	keywords.append(keys, keywordLen, status);
2447	current = keywords.data() + currentIndex;
2448	}
2449	}
2450	}
2451
2452	virtual ~KeywordEnumeration();
2453
2454	virtual StringEnumeration * clone() const override
2455	{
2456	UErrorCode status = U_ZERO_ERROR;
2457	return new KeywordEnumeration(
2458	keywords.data(), keywords.length(),
2459	static_cast<int32_t>(current - keywords.data()), status);
2460	}
2461
2462	virtual int32_t count(UErrorCode& status) const override {
2463	if (U_FAILURE(status)) { return 0; }
2464	const char *kw = keywords.data();
2465	int32_t result = 0;
2466	while(*kw) {
2467	result++;
2468	kw += uprv_strlen(kw):: strlen(kw)+1;
2469	}
2470	return result;
2471	}
2472
2473	virtual const char* next(int32_t* resultLength, UErrorCode& status) override {
2474	const char* result;
2475	int32_t len;
2476	if(U_SUCCESS(status) && *current != 0) {
2477	result = current;
2478	len = static_cast<int32_t>(uprv_strlen(current):: strlen(current));
2479	current += len+1;
2480	if(resultLength != nullptr) {
2481	*resultLength = len;
2482	}
2483	} else {
2484	if(resultLength != nullptr) {
2485	*resultLength = 0;
2486	}
2487	result = nullptr;
2488	}
2489	return result;
2490	}
2491
2492	virtual const UnicodeString* snext(UErrorCode& status) override {
2493	if (U_FAILURE(status)) { return nullptr; }
2494	int32_t resultLength = 0;
2495	const char *s = next(&resultLength, status);
2496	return setChars(s, resultLength, status);
2497	}
2498
2499	virtual void reset(UErrorCode& status) override {
2500	if (U_FAILURE(status)) { return; }
2501	current = keywords.data();
2502	}
2503	};
2504
2505	const char KeywordEnumeration::fgClassID = '\0';
2506
2507	// Out-of-line virtual destructor to serve as the "key function".
2508	KeywordEnumeration::~KeywordEnumeration() = default;
2509
2510	// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
2511	// the next() method for each keyword before returning it.
2512	class UnicodeKeywordEnumeration : public KeywordEnumeration {
2513	public:
2514	using KeywordEnumeration::KeywordEnumeration;
2515	virtual ~UnicodeKeywordEnumeration();
2516
2517	virtual const char* next(int32_t* resultLength, UErrorCode& status) override {
2518	const char* legacy_key = KeywordEnumeration::next(nullptr, status);
2519	while (U_SUCCESS(status) && legacy_key != nullptr) {
2520	const char* key = uloc_toUnicodeLocaleKeyuloc_toUnicodeLocaleKey_77(legacy_key);
2521	if (key != nullptr) {
2522	if (resultLength != nullptr) {
2523	*resultLength = static_cast<int32_t>(uprv_strlen(key):: strlen(key));
2524	}
2525	return key;
2526	}
2527	// Not a Unicode keyword, could be a t, x or other, continue to look at the next one.
2528	legacy_key = KeywordEnumeration::next(nullptr, status);
2529	}
2530	if (resultLength != nullptr) *resultLength = 0;
2531	return nullptr;
2532	}
2533	virtual int32_t count(UErrorCode& status) const override {
2534	if (U_FAILURE(status)) { return 0; }
2535	const char *kw = keywords.data();
2536	int32_t result = 0;
2537	while(*kw) {
2538	if (uloc_toUnicodeLocaleKeyuloc_toUnicodeLocaleKey_77(kw) != nullptr) {
2539	result++;
2540	}
2541	kw += uprv_strlen(kw):: strlen(kw)+1;
2542	}
2543	return result;
2544	}
2545	};
2546
2547	// Out-of-line virtual destructor to serve as the "key function".
2548	UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
2549
2550	StringEnumeration *
2551	Locale::createKeywords(UErrorCode &status) const
2552	{
2553	StringEnumeration *result = nullptr;
2554
2555	if (U_FAILURE(status)) {
2556	return result;
2557	}
2558
2559	const char* variantStart = uprv_strchr(fullName, '@'):: strchr(fullName, '@');
2560	const char* assignment = uprv_strchr(fullName, '='):: strchr(fullName, '=');
2561	if(variantStart) {
2562	if(assignment > variantStart) {
2563	CharString keywords = ulocimp_getKeywordsulocimp_getKeywords_77(variantStart + 1, '@', false, status);
2564	if (U_SUCCESS(status) && !keywords.isEmpty()) {
2565	result = new KeywordEnumeration(keywords.data(), keywords.length(), 0, status);
2566	if (!result) {
2567	status = U_MEMORY_ALLOCATION_ERROR;
2568	}
2569	}
2570	} else {
2571	status = U_INVALID_FORMAT_ERROR;
2572	}
2573	}
2574	return result;
2575	}
2576
2577	StringEnumeration *
2578	Locale::createUnicodeKeywords(UErrorCode &status) const
2579	{
2580	StringEnumeration *result = nullptr;
2581
2582	if (U_FAILURE(status)) {
2583	return result;
2584	}
2585
2586	const char* variantStart = uprv_strchr(fullName, '@'):: strchr(fullName, '@');
2587	const char* assignment = uprv_strchr(fullName, '='):: strchr(fullName, '=');
2588	if(variantStart) {
2589	if(assignment > variantStart) {
2590	CharString keywords = ulocimp_getKeywordsulocimp_getKeywords_77(variantStart + 1, '@', false, status);
2591	if (U_SUCCESS(status) && !keywords.isEmpty()) {
2592	result = new UnicodeKeywordEnumeration(keywords.data(), keywords.length(), 0, status);
2593	if (!result) {
2594	status = U_MEMORY_ALLOCATION_ERROR;
2595	}
2596	}
2597	} else {
2598	status = U_INVALID_FORMAT_ERROR;
2599	}
2600	}
2601	return result;
2602	}
2603
2604	int32_t
2605	Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
2606	{
2607	return uloc_getKeywordValueuloc_getKeywordValue_77(fullName, keywordName, buffer, bufLen, &status);
2608	}
2609
2610	void
2611	Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
2612	if (U_FAILURE(status)) {
2613	return;
2614	}
2615
2616	if (fIsBogus) {
2617	status = U_ILLEGAL_ARGUMENT_ERROR;
2618	return;
2619	}
2620
2621	ulocimp_getKeywordValueulocimp_getKeywordValue_77(fullName, keywordName, sink, status);
2622	}
2623
2624	void
2625	Locale::getUnicodeKeywordValue(StringPiece keywordName,
2626	ByteSink& sink,
2627	UErrorCode& status) const {
2628	if (U_FAILURE(status)) {
2629	return;
2630	}
2631
2632	std::optional<std::string_view> legacy_key = ulocimp_toLegacyKeyWithFallbackulocimp_toLegacyKeyWithFallback_77(keywordName);
2633	if (!legacy_key.has_value()) {
2634	status = U_ILLEGAL_ARGUMENT_ERROR;
2635	return;
2636	}
2637
2638	auto legacy_value = getKeywordValue<CharString>(*legacy_key, status);
2639
2640	if (U_FAILURE(status)) {
2641	return;
2642	}
2643
2644	std::optional<std::string_view> unicode_value =
2645	ulocimp_toBcpTypeWithFallbackulocimp_toBcpTypeWithFallback_77(keywordName, legacy_value.toStringPiece());
2646	if (!unicode_value.has_value()) {
2647	status = U_ILLEGAL_ARGUMENT_ERROR;
2648	return;
2649	}
2650
2651	sink.Append(unicode_value->data(), static_cast<int32_t>(unicode_value->size()));
2652	}
2653
2654	void
2655	Locale::setKeywordValue(StringPiece keywordName,
2656	StringPiece keywordValue,
2657	UErrorCode& status) {
2658	if (U_FAILURE(status)) { return; }
2659	if (keywordName.empty()) {
2660	status = U_ILLEGAL_ARGUMENT_ERROR;
2661	return;
2662	}
2663	if (status == U_STRING_NOT_TERMINATED_WARNING) {
2664	status = U_ZERO_ERROR;
2665	}
2666
2667	int32_t length = static_cast<int32_t>(uprv_strlen(fullName):: strlen(fullName));
2668	int32_t capacity = fullName == fullNameBuffer ? ULOC_FULLNAME_CAPACITY157 : length + 1;
2669
2670	const char* start = locale_getKeywordsStartlocale_getKeywordsStart_77(fullName);
2671	int32_t offset = start == nullptr ? length : start - fullName;
2672
2673	for (;;) {
2674	// Remove -1 from the capacity so that this function can guarantee NUL termination.
2675	CheckedArrayByteSink sink(fullName + offset, capacity - offset - 1);
2676
2677	int32_t reslen = ulocimp_setKeywordValueulocimp_setKeywordValue_77(
2678	{fullName + offset, static_cast<std::string_view::size_type>(length - offset)},
2679	keywordName,
2680	keywordValue,
2681	sink,
2682	status);
2683
2684	if (status == U_BUFFER_OVERFLOW_ERROR) {
2685	capacity = reslen + offset + 1;
2686	char* newFullName = static_cast<char*>(uprv_mallocuprv_malloc_77(capacity));
2687	if (newFullName == nullptr) {
2688	status = U_MEMORY_ALLOCATION_ERROR;
2689	return;
2690	}
2691	uprv_memcpy(newFullName, fullName, length + 1)do { clang diagnostic push clang diagnostic ignored "-Waddress" (static_cast <bool> (newFullName != __null) ? void (0 ) : __assert_fail ("newFullName != __null", __builtin_FILE () , __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); (static_cast <bool> (fullName != __null) ? void (0) : __assert_fail ("fullName != __null", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); clang diagnostic pop :: memcpy(newFullName, fullName, length + 1); } while (false);
2692	if (fullName != fullNameBuffer) {
2693	if (baseName == fullName) {
2694	baseName = newFullName; // baseName should not point to freed memory.
2695	}
2696	// if fullName is already on the heap, need to free it.
2697	uprv_freeuprv_free_77(fullName);
2698	}
2699	fullName = newFullName;
2700	status = U_ZERO_ERROR;
2701	continue;
2702	}
2703
2704	if (U_FAILURE(status)) { return; }
2705	u_terminateCharsu_terminateChars_77(fullName, capacity, reslen + offset, &status);
2706	break;
2707	}
2708
2709	if (baseName == fullName) {
2710	// May have added the first keyword, meaning that the fullName is no longer also the baseName.
2711	initBaseName(status);
2712	}
2713	}
2714
2715	void
2716	Locale::setUnicodeKeywordValue(StringPiece keywordName,
2717	StringPiece keywordValue,
2718	UErrorCode& status) {
2719	if (U_FAILURE(status)) {
2720	return;
2721	}
2722
2723	std::optional<std::string_view> legacy_key = ulocimp_toLegacyKeyWithFallbackulocimp_toLegacyKeyWithFallback_77(keywordName);
2724	if (!legacy_key.has_value()) {
2725	status = U_ILLEGAL_ARGUMENT_ERROR;
2726	return;
2727	}
2728
2729	std::string_view value;
2730
2731	if (!keywordValue.empty()) {
2732	std::optional<std::string_view> legacy_value =
2733	ulocimp_toLegacyTypeWithFallbackulocimp_toLegacyTypeWithFallback_77(keywordName, keywordValue);
2734	if (!legacy_value.has_value()) {
2735	status = U_ILLEGAL_ARGUMENT_ERROR;
2736	return;
2737	}
2738	value = *legacy_value;
2739	}
2740
2741	setKeywordValue(*legacy_key, value, status);
2742	}
2743
2744	const char *
2745	Locale::getBaseName() const {
2746	return baseName;
2747	}
2748
2749	Locale::Iterator::~Iterator() = default;
2750
2751	//eof
2752	U_NAMESPACE_END}