Bug Summary

File:root/firefox-clang/intl/icu/source/common/locid.cpp
Warning:line 1905, column 9
Value stored to 'separator' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name locid.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/common -fcoverage-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/common -resource-dir /usr/lib/llvm-21/lib/clang/21 -include /root/firefox-clang/config/gcc_hidden.h -include /root/firefox-clang/obj-x86_64-pc-linux-gnu/mozilla-config.h -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -D U_COMMON_IMPLEMENTATION -D _LIBCPP_DISABLE_DEPRECATION_WARNINGS -D U_USING_ICU_NAMESPACE=0 -D U_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 -D U_HIDE_OBSOLETE_UTF_OLD_H=1 -D UCONFIG_NO_LEGACY_CONVERSION -D UCONFIG_NO_TRANSLITERATION -D UCONFIG_NO_REGULAR_EXPRESSIONS -D UCONFIG_NO_BREAK_ITERATION -D UCONFIG_NO_IDNA -D UCONFIG_NO_MF2 -D U_CHARSET_IS_UTF8 -D UNISTR_FROM_CHAR_EXPLICIT=explicit -D UNISTR_FROM_STRING_EXPLICIT=explicit -D U_ENABLE_DYLOAD=0 -D U_DEBUG=1 -I /root/firefox-clang/config/external/icu/common -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/common -I /root/firefox-clang/intl/icu/source/i18n -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/x86_64-linux-gnu/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/backward -internal-isystem /usr/lib/llvm-21/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=pessimizing-move -Wno-error=large-by-value-copy=128 -Wno-error=implicit-int-float-conversion -Wno-error=thread-safety-analysis -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -Wno-comma -Wno-implicit-const-int-float-conversion -Wno-macro-redefined -Wno-microsoft-include -Wno-tautological-unsigned-enum-zero-compare -Wno-unreachable-code-loop-increment -Wno-unreachable-code-return -fdeprecated-macro -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fno-sized-deallocation -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-06-27-100320-3286336-1 -x c++ /root/firefox-clang/intl/icu/source/common/locid.cpp
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8*
9* File locid.cpp
10*
11* Created by: Richard Gillam
12*
13* Modification History:
14*
15* Date Name Description
16* 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
17* methods to get and set it.
18* 04/02/97 aliu Made operator!= inline; fixed return value
19* of getName().
20* 04/15/97 aliu Cleanup for AIX/Win32.
21* 04/24/97 aliu Numerous changes per code review.
22* 08/18/98 stephen Changed getDisplayName()
23* Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
24* Added getISOCountries(), getISOLanguages(),
25* getLanguagesForCountry()
26* 03/16/99 bertrand rehaul.
27* 07/21/99 stephen Added U_CFUNC setDefault
28* 11/09/99 weiv Added const char * getName() const;
29* 04/12/00 srl removing unicodestring api's and cached hash code
30* 08/10/01 grhoten Change the static Locales to accessor functions
31******************************************************************************
32*/
33
34#include <optional>
35#include <string_view>
36#include <utility>
37
38#include "unicode/bytestream.h"
39#include "unicode/locid.h"
40#include "unicode/localebuilder.h"
41#include "unicode/strenum.h"
42#include "unicode/stringpiece.h"
43#include "unicode/uloc.h"
44#include "unicode/ures.h"
45
46#include "bytesinkutil.h"
47#include "charstr.h"
48#include "charstrmap.h"
49#include "cmemory.h"
50#include "cstring.h"
51#include "mutex.h"
52#include "putilimp.h"
53#include "uassert.h"
54#include "ucln_cmn.h"
55#include "uhash.h"
56#include "ulocimp.h"
57#include "umutex.h"
58#include "uniquecharstr.h"
59#include "ustr_imp.h"
60#include "uvector.h"
61
62U_NAMESPACE_BEGINnamespace icu_77 {
63
64static Locale *gLocaleCache = nullptr;
65static UInitOnce gLocaleCacheInitOnce {};
66
67// gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
68static UMutex gDefaultLocaleMutex;
69static UHashtable *gDefaultLocalesHashT = nullptr;
70static Locale *gDefaultLocale = nullptr;
71
72/**
73 * \def ULOC_STRING_LIMIT
74 * strings beyond this value crash in CharString
75 */
76#define ULOC_STRING_LIMIT357913941 357913941
77
78U_NAMESPACE_END}
79
80typedef enum ELocalePos {
81 eENGLISH,
82 eFRENCH,
83 eGERMAN,
84 eITALIAN,
85 eJAPANESE,
86 eKOREAN,
87 eCHINESE,
88
89 eFRANCE,
90 eGERMANY,
91 eITALY,
92 eJAPAN,
93 eKOREA,
94 eCHINA, /* Alias for PRC */
95 eTAIWAN,
96 eUK,
97 eUS,
98 eCANADA,
99 eCANADA_FRENCH,
100 eROOT,
101
102
103 //eDEFAULT,
104 eMAX_LOCALES
105} ELocalePos;
106
107namespace {
108
109//
110// Deleter function for Locales owned by the default Locale hash table/
111//
112void U_CALLCONV
113deleteLocale(void *obj) {
114 delete static_cast<icu::Locale*>(obj);
115}
116
117UBool U_CALLCONV locale_cleanup()
118{
119 U_NAMESPACE_USEusing namespace icu_77;
120
121 delete [] gLocaleCache;
122 gLocaleCache = nullptr;
123 gLocaleCacheInitOnce.reset();
124
125 if (gDefaultLocalesHashT) {
126 uhash_closeuhash_close_77(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
127 gDefaultLocalesHashT = nullptr;
128 }
129 gDefaultLocale = nullptr;
130 return true;
131}
132
133void U_CALLCONV locale_init(UErrorCode &status) {
134 U_NAMESPACE_USEusing namespace icu_77;
135
136 U_ASSERT(gLocaleCache == nullptr)(static_cast <bool> (gLocaleCache == nullptr) ? void (0
) : __assert_fail ("gLocaleCache == nullptr", __builtin_FILE (
), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__))
;
137 gLocaleCache = new Locale[static_cast<int>(eMAX_LOCALES)];
138 if (gLocaleCache == nullptr) {
139 status = U_MEMORY_ALLOCATION_ERROR;
140 return;
141 }
142 ucln_common_registerCleanupucln_common_registerCleanup_77(UCLN_COMMON_LOCALE, locale_cleanup);
143 gLocaleCache[eROOT] = Locale("");
144 gLocaleCache[eENGLISH] = Locale("en");
145 gLocaleCache[eFRENCH] = Locale("fr");
146 gLocaleCache[eGERMAN] = Locale("de");
147 gLocaleCache[eITALIAN] = Locale("it");
148 gLocaleCache[eJAPANESE] = Locale("ja");
149 gLocaleCache[eKOREAN] = Locale("ko");
150 gLocaleCache[eCHINESE] = Locale("zh");
151 gLocaleCache[eFRANCE] = Locale("fr", "FR");
152 gLocaleCache[eGERMANY] = Locale("de", "DE");
153 gLocaleCache[eITALY] = Locale("it", "IT");
154 gLocaleCache[eJAPAN] = Locale("ja", "JP");
155 gLocaleCache[eKOREA] = Locale("ko", "KR");
156 gLocaleCache[eCHINA] = Locale("zh", "CN");
157 gLocaleCache[eTAIWAN] = Locale("zh", "TW");
158 gLocaleCache[eUK] = Locale("en", "GB");
159 gLocaleCache[eUS] = Locale("en", "US");
160 gLocaleCache[eCANADA] = Locale("en", "CA");
161 gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
162}
163
164} // namespace
165
166U_NAMESPACE_BEGINnamespace icu_77 {
167
168Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
169 // Synchronize this entire function.
170 Mutex lock(&gDefaultLocaleMutex);
171
172 UBool canonicalize = false;
173
174 // If given a nullptr string for the locale id, grab the default
175 // name from the system.
176 // (Different from most other locale APIs, where a null name means use
177 // the current ICU default locale.)
178 if (id == nullptr) {
179 id = uprv_getDefaultLocaleIDuprv_getDefaultLocaleID_77(); // This function not thread safe? TODO: verify.
180 canonicalize = true; // always canonicalize host ID
181 }
182
183 CharString localeNameBuf =
184 canonicalize ? ulocimp_canonicalizeulocimp_canonicalize_77(id, status) : ulocimp_getNameulocimp_getName_77(id, status);
185
186 if (U_FAILURE(status)) {
187 return gDefaultLocale;
188 }
189
190 if (gDefaultLocalesHashT == nullptr) {
191 gDefaultLocalesHashT = uhash_openuhash_open_77(uhash_hashCharsuhash_hashChars_77, uhash_compareCharsuhash_compareChars_77, nullptr, &status);
192 if (U_FAILURE(status)) {
193 return gDefaultLocale;
194 }
195 uhash_setValueDeleteruhash_setValueDeleter_77(gDefaultLocalesHashT, deleteLocale);
196 ucln_common_registerCleanupucln_common_registerCleanup_77(UCLN_COMMON_LOCALE, locale_cleanup);
197 }
198
199 Locale* newDefault = static_cast<Locale*>(uhash_getuhash_get_77(gDefaultLocalesHashT, localeNameBuf.data()));
200 if (newDefault == nullptr) {
201 newDefault = new Locale(Locale::eBOGUS);
202 if (newDefault == nullptr) {
203 status = U_MEMORY_ALLOCATION_ERROR;
204 return gDefaultLocale;
205 }
206 newDefault->init(localeNameBuf.data(), false);
207 uhash_putuhash_put_77(gDefaultLocalesHashT, const_cast<char*>(newDefault->getName()), newDefault, &status);
208 if (U_FAILURE(status)) {
209 return gDefaultLocale;
210 }
211 }
212 gDefaultLocale = newDefault;
213 return gDefaultLocale;
214}
215
216U_NAMESPACE_END}
217
218/* sfb 07/21/99 */
219U_CFUNCextern "C" void
220locale_set_defaultlocale_set_default_77(const char *id)
221{
222 U_NAMESPACE_USEusing namespace icu_77;
223 UErrorCode status = U_ZERO_ERROR;
224 locale_set_default_internal(id, status);
225}
226/* end */
227
228U_CFUNCextern "C" const char *
229locale_get_defaultlocale_get_default_77()
230{
231 U_NAMESPACE_USEusing namespace icu_77;
232 return Locale::getDefault().getName();
233}
234
235
236U_NAMESPACE_BEGINnamespace icu_77 {
237
238UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)UClassID Locale::getStaticClassID() { static char classID = 0
; return (UClassID)&classID; } UClassID Locale::getDynamicClassID
() const { return Locale::getStaticClassID(); }
239
240/*Character separating the posix id fields*/
241// '_'
242// In the platform codepage.
243#define SEP_CHAR'_' '_'
244#define NULL_CHAR'\0' '\0'
245
246Locale::~Locale()
247{
248 if ((baseName != fullName) && (baseName != fullNameBuffer)) {
249 uprv_freeuprv_free_77(baseName);
250 }
251 baseName = nullptr;
252 /*if fullName is on the heap, we free it*/
253 if (fullName != fullNameBuffer)
254 {
255 uprv_freeuprv_free_77(fullName);
256 fullName = nullptr;
257 }
258}
259
260Locale::Locale()
261 : UObject(), fullName(fullNameBuffer), baseName(nullptr)
262{
263 init(nullptr, false);
264}
265
266/*
267 * Internal constructor to allow construction of a locale object with
268 * NO side effects. (Default constructor tries to get
269 * the default locale.)
270 */
271Locale::Locale(Locale::ELocaleType)
272 : UObject(), fullName(fullNameBuffer), baseName(nullptr)
273{
274 setToBogus();
275}
276
277
278Locale::Locale( const char * newLanguage,
279 const char * newCountry,
280 const char * newVariant,
281 const char * newKeywords)
282 : UObject(), fullName(fullNameBuffer), baseName(nullptr)
283{
284 if( (newLanguage==nullptr) && (newCountry == nullptr) && (newVariant == nullptr) )
285 {
286 init(nullptr, false); /* shortcut */
287 }
288 else
289 {
290 UErrorCode status = U_ZERO_ERROR;
291 int32_t lsize = 0;
292 int32_t csize = 0;
293 int32_t vsize = 0;
294 int32_t ksize = 0;
295
296 // Check the sizes of the input strings.
297
298 // Language
299 if ( newLanguage != nullptr )
300 {
301 lsize = static_cast<int32_t>(uprv_strlen(newLanguage):: strlen(newLanguage));
302 if ( lsize < 0 || lsize > ULOC_STRING_LIMIT357913941 ) { // int32 wrap
303 setToBogus();
304 return;
305 }
306 }
307
308 CharString togo(newLanguage, lsize, status); // start with newLanguage
309
310 // _Country
311 if ( newCountry != nullptr )
312 {
313 csize = static_cast<int32_t>(uprv_strlen(newCountry):: strlen(newCountry));
314 if ( csize < 0 || csize > ULOC_STRING_LIMIT357913941 ) { // int32 wrap
315 setToBogus();
316 return;
317 }
318 }
319
320 // _Variant
321 if ( newVariant != nullptr )
322 {
323 // remove leading _'s
324 while(newVariant[0] == SEP_CHAR'_')
325 {
326 newVariant++;
327 }
328
329 // remove trailing _'s
330 vsize = static_cast<int32_t>(uprv_strlen(newVariant):: strlen(newVariant));
331 if ( vsize < 0 || vsize > ULOC_STRING_LIMIT357913941 ) { // int32 wrap
332 setToBogus();
333 return;
334 }
335 while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR'_') )
336 {
337 vsize--;
338 }
339 }
340
341 if ( newKeywords != nullptr)
342 {
343 ksize = static_cast<int32_t>(uprv_strlen(newKeywords):: strlen(newKeywords));
344 if ( ksize < 0 || ksize > ULOC_STRING_LIMIT357913941 ) {
345 setToBogus();
346 return;
347 }
348 }
349
350 // We've checked the input sizes, now build up the full locale string..
351
352 // newLanguage is already copied
353
354 if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
355 { // ^
356 togo.append(SEP_CHAR'_', status);
357 }
358
359 if ( csize != 0 )
360 {
361 togo.append(newCountry, status);
362 }
363
364 if ( vsize != 0)
365 {
366 togo.append(SEP_CHAR'_', status)
367 .append(newVariant, vsize, status);
368 }
369
370 if ( ksize != 0)
371 {
372 if (uprv_strchr(newKeywords, '='):: strchr(newKeywords, '=')) {
373 togo.append('@', status); /* keyword parsing */
374 }
375 else {
376 togo.append('_', status); /* Variant parsing with a script */
377 if ( vsize == 0) {
378 togo.append('_', status); /* No country found */
379 }
380 }
381 togo.append(newKeywords, status);
382 }
383
384 if (U_FAILURE(status)) {
385 // Something went wrong with appending, etc.
386 setToBogus();
387 return;
388 }
389 // Parse it, because for example 'language' might really be a complete
390 // string.
391 init(togo.data(), false);
392 }
393}
394
395Locale::Locale(const Locale &other)
396 : UObject(other), fullName(fullNameBuffer), baseName(nullptr)
397{
398 *this = other;
399}
400
401Locale::Locale(Locale&& other) noexcept
402 : UObject(other), fullName(fullNameBuffer), baseName(fullName) {
403 *this = std::move(other);
404}
405
406Locale& Locale::operator=(const Locale& other) {
407 if (this == &other) {
408 return *this;
409 }
410
411 setToBogus();
412
413 if (other.fullName == other.fullNameBuffer) {
414 uprv_strcpy(fullNameBuffer, other.fullNameBuffer):: strcpy(fullNameBuffer, other.fullNameBuffer);
415 } else if (other.fullName == nullptr) {
416 fullName = nullptr;
417 } else {
418 fullName = uprv_strdupuprv_strdup_77(other.fullName);
419 if (fullName == nullptr) return *this;
420 }
421
422 if (other.baseName == other.fullName) {
423 baseName = fullName;
424 } else if (other.baseName != nullptr) {
425 baseName = uprv_strdupuprv_strdup_77(other.baseName);
426 if (baseName == nullptr) return *this;
427 }
428
429 uprv_strcpy(language, other.language):: strcpy(language, other.language);
430 uprv_strcpy(script, other.script):: strcpy(script, other.script);
431 uprv_strcpy(country, other.country):: strcpy(country, other.country);
432
433 variantBegin = other.variantBegin;
434 fIsBogus = other.fIsBogus;
435
436 return *this;
437}
438
439Locale& Locale::operator=(Locale&& other) noexcept {
440 if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_freeuprv_free_77(baseName);
441 if (fullName != fullNameBuffer) uprv_freeuprv_free_77(fullName);
442
443 if (other.fullName == other.fullNameBuffer || other.baseName == other.fullNameBuffer) {
444 uprv_strcpy(fullNameBuffer, other.fullNameBuffer):: strcpy(fullNameBuffer, other.fullNameBuffer);
445 }
446 if (other.fullName == other.fullNameBuffer) {
447 fullName = fullNameBuffer;
448 } else {
449 fullName = other.fullName;
450 }
451
452 if (other.baseName == other.fullNameBuffer) {
453 baseName = fullNameBuffer;
454 } else if (other.baseName == other.fullName) {
455 baseName = fullName;
456 } else {
457 baseName = other.baseName;
458 }
459
460 uprv_strcpy(language, other.language):: strcpy(language, other.language);
461 uprv_strcpy(script, other.script):: strcpy(script, other.script);
462 uprv_strcpy(country, other.country):: strcpy(country, other.country);
463
464 variantBegin = other.variantBegin;
465 fIsBogus = other.fIsBogus;
466
467 other.baseName = other.fullName = other.fullNameBuffer;
468
469 return *this;
470}
471
472Locale *
473Locale::clone() const {
474 return new Locale(*this);
475}
476
477bool
478Locale::operator==( const Locale& other) const
479{
480 return (uprv_strcmp(other.fullName, fullName):: strcmp(other.fullName, fullName) == 0);
481}
482
483namespace {
484
485UInitOnce gKnownCanonicalizedInitOnce {};
486UHashtable *gKnownCanonicalized = nullptr;
487
488constexpr const char* KNOWN_CANONICALIZED[] = {
489 "c",
490 // Commonly used locales known are already canonicalized
491 "af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
492 "be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES",
493 "cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR",
494 "en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu",
495 "eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR",
496 "ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN",
497 "hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS",
498 "it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ",
499 "km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
500 "lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
501 "mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
502 "nl", "nl_NL", "no", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
503 "pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
504 "si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
505 "sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
506 "ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk",
507 "uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant",
508 "yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant",
509 "zh_Hant_TW", "zh_TW", "zu", "zu_ZA"
510};
511
512UBool U_CALLCONV cleanupKnownCanonicalized() {
513 gKnownCanonicalizedInitOnce.reset();
514 if (gKnownCanonicalized) { uhash_closeuhash_close_77(gKnownCanonicalized); }
515 return true;
516}
517
518void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
519 ucln_common_registerCleanupucln_common_registerCleanup_77(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
520 cleanupKnownCanonicalized);
521 LocalUHashtablePointer newKnownCanonicalizedMap(
522 uhash_openuhash_open_77(uhash_hashCharsuhash_hashChars_77, uhash_compareCharsuhash_compareChars_77, nullptr, &status));
523 for (int32_t i = 0;
524 U_SUCCESS(status) && i < UPRV_LENGTHOF(KNOWN_CANONICALIZED)(int32_t)(sizeof(KNOWN_CANONICALIZED)/sizeof((KNOWN_CANONICALIZED
)[0]))
;
525 i++) {
526 uhash_putiuhash_puti_77(newKnownCanonicalizedMap.getAlias(),
527 (void*)KNOWN_CANONICALIZED[i],
528 1, &status);
529 }
530 if (U_FAILURE(status)) {
531 return;
532 }
533
534 gKnownCanonicalized = newKnownCanonicalizedMap.orphan();
535}
536
537class AliasData;
538
539/**
540 * A Builder class to build the alias data.
541 */
542class AliasDataBuilder {
543public:
544 AliasDataBuilder() {
545 }
546
547 // Build the AliasData from resource.
548 AliasData* build(UErrorCode &status);
549
550private:
551 void readAlias(UResourceBundle* alias,
552 UniqueCharStrings* strings,
553 LocalMemory<const char*>& types,
554 LocalMemory<int32_t>& replacementIndexes,
555 int32_t &length,
556 void (*checkType)(const char* type),
557 void (*checkReplacement)(const UChar* replacement),
558 UErrorCode &status);
559
560 // Read the languageAlias data from alias to
561 // strings+types+replacementIndexes
562 // The number of record will be stored into length.
563 // Allocate length items for types, to store the type field.
564 // Allocate length items for replacementIndexes,
565 // to store the index in the strings for the replacement script.
566 void readLanguageAlias(UResourceBundle* alias,
567 UniqueCharStrings* strings,
568 LocalMemory<const char*>& types,
569 LocalMemory<int32_t>& replacementIndexes,
570 int32_t &length,
571 UErrorCode &status);
572
573 // Read the scriptAlias data from alias to
574 // strings+types+replacementIndexes
575 // Allocate length items for types, to store the type field.
576 // Allocate length items for replacementIndexes,
577 // to store the index in the strings for the replacement script.
578 void readScriptAlias(UResourceBundle* alias,
579 UniqueCharStrings* strings,
580 LocalMemory<const char*>& types,
581 LocalMemory<int32_t>& replacementIndexes,
582 int32_t &length, UErrorCode &status);
583
584 // Read the territoryAlias data from alias to
585 // strings+types+replacementIndexes
586 // Allocate length items for types, to store the type field.
587 // Allocate length items for replacementIndexes,
588 // to store the index in the strings for the replacement script.
589 void readTerritoryAlias(UResourceBundle* alias,
590 UniqueCharStrings* strings,
591 LocalMemory<const char*>& types,
592 LocalMemory<int32_t>& replacementIndexes,
593 int32_t &length, UErrorCode &status);
594
595 // Read the variantAlias data from alias to
596 // strings+types+replacementIndexes
597 // Allocate length items for types, to store the type field.
598 // Allocate length items for replacementIndexes,
599 // to store the index in the strings for the replacement variant.
600 void readVariantAlias(UResourceBundle* alias,
601 UniqueCharStrings* strings,
602 LocalMemory<const char*>& types,
603 LocalMemory<int32_t>& replacementIndexes,
604 int32_t &length, UErrorCode &status);
605
606 // Read the subdivisionAlias data from alias to
607 // strings+types+replacementIndexes
608 // Allocate length items for types, to store the type field.
609 // Allocate length items for replacementIndexes,
610 // to store the index in the strings for the replacement variant.
611 void readSubdivisionAlias(UResourceBundle* alias,
612 UniqueCharStrings* strings,
613 LocalMemory<const char*>& types,
614 LocalMemory<int32_t>& replacementIndexes,
615 int32_t &length, UErrorCode &status);
616};
617
618/**
619 * A class to hold the Alias Data.
620 */
621class AliasData : public UMemory {
622public:
623 static const AliasData* singleton(UErrorCode& status) {
624 if (U_FAILURE(status)) {
625 // Do not get into loadData if the status already has error.
626 return nullptr;
627 }
628 umtx_initOnce(AliasData::gInitOnce, &AliasData::loadData, status);
629 return gSingleton;
630 }
631
632 const CharStringMap& languageMap() const { return language; }
633 const CharStringMap& scriptMap() const { return script; }
634 const CharStringMap& territoryMap() const { return territory; }
635 const CharStringMap& variantMap() const { return variant; }
636 const CharStringMap& subdivisionMap() const { return subdivision; }
637
638 static void U_CALLCONV loadData(UErrorCode &status);
639 static UBool U_CALLCONV cleanup();
640
641 static UInitOnce gInitOnce;
642
643private:
644 AliasData(CharStringMap languageMap,
645 CharStringMap scriptMap,
646 CharStringMap territoryMap,
647 CharStringMap variantMap,
648 CharStringMap subdivisionMap,
649 CharString* strings)
650 : language(std::move(languageMap)),
651 script(std::move(scriptMap)),
652 territory(std::move(territoryMap)),
653 variant(std::move(variantMap)),
654 subdivision(std::move(subdivisionMap)),
655 strings(strings) {
656 }
657
658 ~AliasData() {
659 delete strings;
660 }
661
662 static const AliasData* gSingleton;
663
664 CharStringMap language;
665 CharStringMap script;
666 CharStringMap territory;
667 CharStringMap variant;
668 CharStringMap subdivision;
669 CharString* strings;
670
671 friend class AliasDataBuilder;
672};
673
674
675const AliasData* AliasData::gSingleton = nullptr;
676UInitOnce AliasData::gInitOnce {};
677
678UBool U_CALLCONV
679AliasData::cleanup()
680{
681 gInitOnce.reset();
682 delete gSingleton;
683 return true;
684}
685
686void
687AliasDataBuilder::readAlias(
688 UResourceBundle* alias,
689 UniqueCharStrings* strings,
690 LocalMemory<const char*>& types,
691 LocalMemory<int32_t>& replacementIndexes,
692 int32_t &length,
693 void (*checkType)(const char* type),
694 void (*checkReplacement)(const UChar* replacement),
695 UErrorCode &status) {
696 if (U_FAILURE(status)) {
697 return;
698 }
699 length = ures_getSizeures_getSize_77(alias);
700 const char** rawTypes = types.allocateInsteadAndCopy(length);
701 if (rawTypes == nullptr) {
702 status = U_MEMORY_ALLOCATION_ERROR;
703 return;
704 }
705 int32_t* rawIndexes = replacementIndexes.allocateInsteadAndCopy(length);
706 if (rawIndexes == nullptr) {
707 status = U_MEMORY_ALLOCATION_ERROR;
708 return;
709 }
710 for (int i = 0; U_SUCCESS(status) && ures_hasNextures_hasNext_77(alias); i++) {
711 LocalUResourceBundlePointer res(
712 ures_getNextResourceures_getNextResource_77(alias, nullptr, &status));
713 const char* aliasFrom = ures_getKeyures_getKey_77(res.getAlias());
714 const UChar* aliasTo =
715 ures_getStringByKeyures_getStringByKey_77(res.getAlias(), "replacement", nullptr, &status);
716 if (U_FAILURE(status)) return;
717
718 checkType(aliasFrom);
719 checkReplacement(aliasTo);
720
721 rawTypes[i] = aliasFrom;
722 rawIndexes[i] = strings->add(aliasTo, status);
723 }
724}
725
726/**
727 * Read the languageAlias data from alias to strings+types+replacementIndexes.
728 * Allocate length items for types, to store the type field. Allocate length
729 * items for replacementIndexes, to store the index in the strings for the
730 * replacement language.
731 */
732void
733AliasDataBuilder::readLanguageAlias(
734 UResourceBundle* alias,
735 UniqueCharStrings* strings,
736 LocalMemory<const char*>& types,
737 LocalMemory<int32_t>& replacementIndexes,
738 int32_t &length,
739 UErrorCode &status)
740{
741 return readAlias(
742 alias, strings, types, replacementIndexes, length,
743#if U_DEBUG1
744 [](const char* type) {
745 // Assert the aliasFrom only contains the following possibilities
746 // language_REGION_variant
747 // language_REGION
748 // language_variant
749 // language
750 // und_variant
751 Locale test(type);
752 // Assert no script in aliasFrom
753 U_ASSERT(test.getScript()[0] == '\0')(static_cast <bool> (test.getScript()[0] == '\0') ? void
(0) : __assert_fail ("test.getScript()[0] == '\\0'", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__))
;
754 // Assert when language is und, no REGION in aliasFrom.
755 U_ASSERT(test.getLanguage()[0] != '\0' || test.getCountry()[0] == '\0')(static_cast <bool> (test.getLanguage()[0] != '\0' || test
.getCountry()[0] == '\0') ? void (0) : __assert_fail ("test.getLanguage()[0] != '\\0' || test.getCountry()[0] == '\\0'"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
756 },
757#else
758 [](const char*) {},
759#endif
760 [](const UChar*) {}, status);
761}
762
763/**
764 * Read the scriptAlias data from alias to strings+types+replacementIndexes.
765 * Allocate length items for types, to store the type field. Allocate length
766 * items for replacementIndexes, to store the index in the strings for the
767 * replacement script.
768 */
769void
770AliasDataBuilder::readScriptAlias(
771 UResourceBundle* alias,
772 UniqueCharStrings* strings,
773 LocalMemory<const char*>& types,
774 LocalMemory<int32_t>& replacementIndexes,
775 int32_t &length,
776 UErrorCode &status)
777{
778 return readAlias(
779 alias, strings, types, replacementIndexes, length,
780#if U_DEBUG1
781 [](const char* type) {
782 U_ASSERT(uprv_strlen(type) == 4)(static_cast <bool> (:: strlen(type) == 4) ? void (0) :
__assert_fail (":: strlen(type) == 4", __builtin_FILE (), __builtin_LINE
(), __extension__ __PRETTY_FUNCTION__))
;
783 },
784 [](const UChar* replacement) {
785 U_ASSERT(u_strlen(replacement) == 4)(static_cast <bool> (u_strlen_77(replacement) == 4) ? void
(0) : __assert_fail ("u_strlen_77(replacement) == 4", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__))
;
786 },
787#else
788 [](const char*) {},
789 [](const UChar*) { },
790#endif
791 status);
792}
793
794/**
795 * Read the territoryAlias data from alias to strings+types+replacementIndexes.
796 * Allocate length items for types, to store the type field. Allocate length
797 * items for replacementIndexes, to store the index in the strings for the
798 * replacement regions.
799 */
800void
801AliasDataBuilder::readTerritoryAlias(
802 UResourceBundle* alias,
803 UniqueCharStrings* strings,
804 LocalMemory<const char*>& types,
805 LocalMemory<int32_t>& replacementIndexes,
806 int32_t &length,
807 UErrorCode &status)
808{
809 return readAlias(
810 alias, strings, types, replacementIndexes, length,
811#if U_DEBUG1
812 [](const char* type) {
813 U_ASSERT(uprv_strlen(type) == 2 || uprv_strlen(type) == 3)(static_cast <bool> (:: strlen(type) == 2 || :: strlen(
type) == 3) ? void (0) : __assert_fail (":: strlen(type) == 2 || :: strlen(type) == 3"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
814 },
815#else
816 [](const char*) {},
817#endif
818 [](const UChar*) { },
819 status);
820}
821
822/**
823 * Read the variantAlias data from alias to strings+types+replacementIndexes.
824 * Allocate length items for types, to store the type field. Allocate length
825 * items for replacementIndexes, to store the index in the strings for the
826 * replacement variant.
827 */
828void
829AliasDataBuilder::readVariantAlias(
830 UResourceBundle* alias,
831 UniqueCharStrings* strings,
832 LocalMemory<const char*>& types,
833 LocalMemory<int32_t>& replacementIndexes,
834 int32_t &length,
835 UErrorCode &status)
836{
837 return readAlias(
838 alias, strings, types, replacementIndexes, length,
839#if U_DEBUG1
840 [](const char* type) {
841 U_ASSERT(uprv_strlen(type) >= 4 && uprv_strlen(type) <= 8)(static_cast <bool> (:: strlen(type) >= 4 &&
:: strlen(type) <= 8) ? void (0) : __assert_fail (":: strlen(type) >= 4 && :: strlen(type) <= 8"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
842 U_ASSERT(uprv_strlen(type) != 4 ||(static_cast <bool> (:: strlen(type) != 4 || (type[0] >=
'0' && type[0] <= '9')) ? void (0) : __assert_fail
(":: strlen(type) != 4 || (type[0] >= '0' && type[0] <= '9')"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
843 (type[0] >= '0' && type[0] <= '9'))(static_cast <bool> (:: strlen(type) != 4 || (type[0] >=
'0' && type[0] <= '9')) ? void (0) : __assert_fail
(":: strlen(type) != 4 || (type[0] >= '0' && type[0] <= '9')"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
844 },
845 [](const UChar* replacement) {
846 int32_t len = u_strlenu_strlen_77(replacement);
847 U_ASSERT(len >= 4 && len <= 8)(static_cast <bool> (len >= 4 && len <= 8
) ? void (0) : __assert_fail ("len >= 4 && len <= 8"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
848 U_ASSERT(len != 4 ||(static_cast <bool> (len != 4 || (*replacement >= u'0'
&& *replacement <= u'9')) ? void (0) : __assert_fail
("len != 4 || (*replacement >= u'0' && *replacement <= u'9')"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
849 (*replacement >= u'0' &&(static_cast <bool> (len != 4 || (*replacement >= u'0'
&& *replacement <= u'9')) ? void (0) : __assert_fail
("len != 4 || (*replacement >= u'0' && *replacement <= u'9')"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
850 *replacement <= u'9'))(static_cast <bool> (len != 4 || (*replacement >= u'0'
&& *replacement <= u'9')) ? void (0) : __assert_fail
("len != 4 || (*replacement >= u'0' && *replacement <= u'9')"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
851 },
852#else
853 [](const char*) {},
854 [](const UChar*) { },
855#endif
856 status);
857}
858
859/**
860 * Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
861 * Allocate length items for types, to store the type field. Allocate length
862 * items for replacementIndexes, to store the index in the strings for the
863 * replacement regions.
864 */
865void
866AliasDataBuilder::readSubdivisionAlias(
867 UResourceBundle* alias,
868 UniqueCharStrings* strings,
869 LocalMemory<const char*>& types,
870 LocalMemory<int32_t>& replacementIndexes,
871 int32_t &length,
872 UErrorCode &status)
873{
874 return readAlias(
875 alias, strings, types, replacementIndexes, length,
876#if U_DEBUG1
877 [](const char* type) {
878 U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8)(static_cast <bool> (:: strlen(type) >= 3 &&
:: strlen(type) <= 8) ? void (0) : __assert_fail (":: strlen(type) >= 3 && :: strlen(type) <= 8"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
879 },
880#else
881 [](const char*) {},
882#endif
883 [](const UChar*) { },
884 status);
885}
886
887/**
888 * Initializes the alias data from the ICU resource bundles. The alias data
889 * contains alias of language, country, script and variants.
890 *
891 * If the alias data has already loaded, then this method simply returns without
892 * doing anything meaningful.
893 */
894void U_CALLCONV
895AliasData::loadData(UErrorCode &status)
896{
897#ifdef LOCALE_CANONICALIZATION_DEBUG
898 UDate start = uprv_getRawUTCtimeuprv_getRawUTCtime_77();
899#endif // LOCALE_CANONICALIZATION_DEBUG
900 ucln_common_registerCleanupucln_common_registerCleanup_77(UCLN_COMMON_LOCALE_ALIAS, cleanup);
901 AliasDataBuilder builder;
902 gSingleton = builder.build(status);
903#ifdef LOCALE_CANONICALIZATION_DEBUG
904 UDate end = uprv_getRawUTCtimeuprv_getRawUTCtime_77();
905 printf("AliasData::loadData took total %f ms\n", end - start);
906#endif // LOCALE_CANONICALIZATION_DEBUG
907}
908
909/**
910 * Build the alias data from resources.
911 */
912AliasData*
913AliasDataBuilder::build(UErrorCode &status) {
914 if (U_FAILURE(status)) { return nullptr; }
915
916 LocalUResourceBundlePointer metadata(
917 ures_openDirectures_openDirect_77(nullptr, "metadata", &status));
918 LocalUResourceBundlePointer metadataAlias(
919 ures_getByKeyures_getByKey_77(metadata.getAlias(), "alias", nullptr, &status));
920 LocalUResourceBundlePointer languageAlias(
921 ures_getByKeyures_getByKey_77(metadataAlias.getAlias(), "language", nullptr, &status));
922 LocalUResourceBundlePointer scriptAlias(
923 ures_getByKeyures_getByKey_77(metadataAlias.getAlias(), "script", nullptr, &status));
924 LocalUResourceBundlePointer territoryAlias(
925 ures_getByKeyures_getByKey_77(metadataAlias.getAlias(), "territory", nullptr, &status));
926 LocalUResourceBundlePointer variantAlias(
927 ures_getByKeyures_getByKey_77(metadataAlias.getAlias(), "variant", nullptr, &status));
928 LocalUResourceBundlePointer subdivisionAlias(
929 ures_getByKeyures_getByKey_77(metadataAlias.getAlias(), "subdivision", nullptr, &status));
930
931 if (U_FAILURE(status)) {
932 return nullptr;
933 }
934 int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
935 variantLength = 0, subdivisionLength = 0;
936
937 // Read the languageAlias into languageTypes, languageReplacementIndexes
938 // and strings
939 UniqueCharStrings strings(status);
940 LocalMemory<const char*> languageTypes;
941 LocalMemory<int32_t> languageReplacementIndexes;
942 readLanguageAlias(languageAlias.getAlias(),
943 &strings,
944 languageTypes,
945 languageReplacementIndexes,
946 languagesLength,
947 status);
948
949 // Read the scriptAlias into scriptTypes, scriptReplacementIndexes
950 // and strings
951 LocalMemory<const char*> scriptTypes;
952 LocalMemory<int32_t> scriptReplacementIndexes;
953 readScriptAlias(scriptAlias.getAlias(),
954 &strings,
955 scriptTypes,
956 scriptReplacementIndexes,
957 scriptLength,
958 status);
959
960 // Read the territoryAlias into territoryTypes, territoryReplacementIndexes
961 // and strings
962 LocalMemory<const char*> territoryTypes;
963 LocalMemory<int32_t> territoryReplacementIndexes;
964 readTerritoryAlias(territoryAlias.getAlias(),
965 &strings,
966 territoryTypes,
967 territoryReplacementIndexes,
968 territoryLength, status);
969
970 // Read the variantAlias into variantTypes, variantReplacementIndexes
971 // and strings
972 LocalMemory<const char*> variantTypes;
973 LocalMemory<int32_t> variantReplacementIndexes;
974 readVariantAlias(variantAlias.getAlias(),
975 &strings,
976 variantTypes,
977 variantReplacementIndexes,
978 variantLength, status);
979
980 // Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
981 // and strings
982 LocalMemory<const char*> subdivisionTypes;
983 LocalMemory<int32_t> subdivisionReplacementIndexes;
984 readSubdivisionAlias(subdivisionAlias.getAlias(),
985 &strings,
986 subdivisionTypes,
987 subdivisionReplacementIndexes,
988 subdivisionLength, status);
989
990 if (U_FAILURE(status)) {
991 return nullptr;
992 }
993
994 // We can only use strings after freeze it.
995 strings.freeze();
996
997 // Build the languageMap from languageTypes & languageReplacementIndexes
998 CharStringMap languageMap(490, status);
999 for (int32_t i = 0; U_SUCCESS(status) && i < languagesLength; i++) {
1000 languageMap.put(languageTypes[i],
1001 strings.get(languageReplacementIndexes[i]),
1002 status);
1003 }
1004
1005 // Build the scriptMap from scriptTypes & scriptReplacementIndexes
1006 CharStringMap scriptMap(1, status);
1007 for (int32_t i = 0; U_SUCCESS(status) && i < scriptLength; i++) {
1008 scriptMap.put(scriptTypes[i],
1009 strings.get(scriptReplacementIndexes[i]),
1010 status);
1011 }
1012
1013 // Build the territoryMap from territoryTypes & territoryReplacementIndexes
1014 CharStringMap territoryMap(650, status);
1015 for (int32_t i = 0; U_SUCCESS(status) && i < territoryLength; i++) {
1016 territoryMap.put(territoryTypes[i],
1017 strings.get(territoryReplacementIndexes[i]),
1018 status);
1019 }
1020
1021 // Build the variantMap from variantTypes & variantReplacementIndexes.
1022 CharStringMap variantMap(2, status);
1023 for (int32_t i = 0; U_SUCCESS(status) && i < variantLength; i++) {
1024 variantMap.put(variantTypes[i],
1025 strings.get(variantReplacementIndexes[i]),
1026 status);
1027 }
1028
1029 // Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
1030 CharStringMap subdivisionMap(2, status);
1031 for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
1032 subdivisionMap.put(subdivisionTypes[i],
1033 strings.get(subdivisionReplacementIndexes[i]),
1034 status);
1035 }
1036
1037 if (U_FAILURE(status)) {
1038 return nullptr;
1039 }
1040
1041 // copy hashtables
1042 auto *data = new AliasData(
1043 std::move(languageMap),
1044 std::move(scriptMap),
1045 std::move(territoryMap),
1046 std::move(variantMap),
1047 std::move(subdivisionMap),
1048 strings.orphanCharStrings());
1049
1050 if (data == nullptr) {
1051 status = U_MEMORY_ALLOCATION_ERROR;
1052 }
1053 return data;
1054}
1055
1056/**
1057 * A class that find the replacement values of locale fields by using AliasData.
1058 */
1059class AliasReplacer {
1060public:
1061 AliasReplacer(UErrorCode& status) :
1062 language(nullptr), script(nullptr), region(nullptr),
1063 extensions(nullptr),
1064 // store value in variants only once
1065 variants(nullptr,
1066 ([](UElement e1, UElement e2) -> UBool {
1067 return 0==uprv_strcmp((const char*)e1.pointer,:: strcmp((const char*)e1.pointer, (const char*)e2.pointer)
1068 (const char*)e2.pointer):: strcmp((const char*)e1.pointer, (const char*)e2.pointer);}),
1069 status),
1070 data(nullptr) {
1071 }
1072 ~AliasReplacer() {
1073 }
1074
1075 // Check the fields inside locale, if need to replace fields,
1076 // place the the replaced locale ID in out and return true.
1077 // Otherwise return false for no replacement or error.
1078 bool replace(
1079 const Locale& locale, CharString& out, UErrorCode& status);
1080
1081private:
1082 const char* language;
1083 const char* script;
1084 const char* region;
1085 const char* extensions;
1086 UVector variants;
1087
1088 const AliasData* data;
1089
1090 inline bool notEmpty(const char* str) {
1091 return str && str[0] != NULL_CHAR'\0';
1092 }
1093
1094 /**
1095 * If replacement is neither null nor empty and input is either null or empty,
1096 * return replacement.
1097 * If replacement is neither null nor empty but input is not empty, return input.
1098 * If replacement is either null or empty and type is either null or empty,
1099 * return input.
1100 * Otherwise return null.
1101 * replacement input type return
1102 * AAA nullptr * AAA
1103 * AAA BBB * BBB
1104 * nullptr || "" CCC nullptr CCC
1105 * nullptr || "" * DDD nullptr
1106 */
1107 inline const char* deleteOrReplace(
1108 const char* input, const char* type, const char* replacement) {
1109 return notEmpty(replacement) ?
1110 ((input == nullptr) ? replacement : input) :
1111 ((type == nullptr) ? input : nullptr);
1112 }
1113
1114 inline bool same(const char* a, const char* b) {
1115 if (a == nullptr && b == nullptr) {
1116 return true;
1117 }
1118 if ((a == nullptr && b != nullptr) ||
1119 (a != nullptr && b == nullptr)) {
1120 return false;
1121 }
1122 return uprv_strcmp(a, b):: strcmp(a, b) == 0;
1123 }
1124
1125 // Gather fields and generate locale ID into out.
1126 CharString& outputToString(CharString& out, UErrorCode& status);
1127
1128 // Generate the lookup key.
1129 CharString& generateKey(const char* language, const char* region,
1130 const char* variant, CharString& out,
1131 UErrorCode& status);
1132
1133 void parseLanguageReplacement(const char* replacement,
1134 const char*& replaceLanguage,
1135 const char*& replaceScript,
1136 const char*& replaceRegion,
1137 const char*& replaceVariant,
1138 const char*& replaceExtensions,
1139 UVector& toBeFreed,
1140 UErrorCode& status);
1141
1142 // Replace by using languageAlias.
1143 bool replaceLanguage(bool checkLanguage, bool checkRegion,
1144 bool checkVariants, UVector& toBeFreed,
1145 UErrorCode& status);
1146
1147 // Replace by using territoryAlias.
1148 bool replaceTerritory(UVector& toBeFreed, UErrorCode& status);
1149
1150 // Replace by using scriptAlias.
1151 bool replaceScript(UErrorCode& status);
1152
1153 // Replace by using variantAlias.
1154 bool replaceVariant(UErrorCode& status);
1155
1156 // Replace by using subdivisionAlias.
1157 bool replaceSubdivision(StringPiece subdivision,
1158 CharString& output, UErrorCode& status);
1159
1160 // Replace transformed extensions.
1161 bool replaceTransformedExtensions(
1162 CharString& transformedExtensions, CharString& output, UErrorCode& status);
1163};
1164
1165CharString&
1166AliasReplacer::generateKey(
1167 const char* language, const char* region, const char* variant,
1168 CharString& out, UErrorCode& status)
1169{
1170 if (U_FAILURE(status)) { return out; }
1171 out.append(language, status);
1172 if (notEmpty(region)) {
1173 out.append(SEP_CHAR'_', status)
1174 .append(region, status);
1175 }
1176 if (notEmpty(variant)) {
1177 out.append(SEP_CHAR'_', status)
1178 .append(variant, status);
1179 }
1180 return out;
1181}
1182
1183void
1184AliasReplacer::parseLanguageReplacement(
1185 const char* replacement,
1186 const char*& replacedLanguage,
1187 const char*& replacedScript,
1188 const char*& replacedRegion,
1189 const char*& replacedVariant,
1190 const char*& replacedExtensions,
1191 UVector& toBeFreed,
1192 UErrorCode& status)
1193{
1194 if (U_FAILURE(status)) {
1195 return;
1196 }
1197 replacedScript = replacedRegion = replacedVariant
1198 = replacedExtensions = nullptr;
1199 if (uprv_strchr(replacement, '_'):: strchr(replacement, '_') == nullptr) {
1200 replacedLanguage = replacement;
1201 // reach the end, just return it.
1202 return;
1203 }
1204 // We have multiple field so we have to allocate and parse
1205 CharString* str =
1206 new CharString(replacement, static_cast<int32_t>(uprv_strlen(replacement):: strlen(replacement)), status);
1207 LocalPointer<CharString> lpStr(str, status);
1208 toBeFreed.adoptElement(lpStr.orphan(), status);
1209 if (U_FAILURE(status)) {
1210 return;
1211 }
1212 char* data = str->data();
1213 replacedLanguage = (const char*) data;
1214 char* endOfField = uprv_strchr(data, '_'):: strchr(data, '_');
1215 *endOfField = '\0'; // null terminiate it.
1216 endOfField++;
1217 const char* start = endOfField;
1218 endOfField = const_cast<char*>(uprv_strchr(start, '_'):: strchr(start, '_'));
1219 size_t len = 0;
1220 if (endOfField == nullptr) {
1221 len = uprv_strlen(start):: strlen(start);
1222 } else {
1223 len = endOfField - start;
1224 *endOfField = '\0'; // null terminiate it.
1225 }
1226 if (len == 4 && uprv_isASCIILetteruprv_isASCIILetter_77(*start)) {
1227 // Got a script
1228 replacedScript = start;
1229 if (endOfField == nullptr) {
1230 return;
1231 }
1232 start = endOfField++;
1233 endOfField = const_cast<char*>(uprv_strchr(start, '_'):: strchr(start, '_'));
1234 if (endOfField == nullptr) {
1235 len = uprv_strlen(start):: strlen(start);
1236 } else {
1237 len = endOfField - start;
1238 *endOfField = '\0'; // null terminiate it.
1239 }
1240 }
1241 if (len >= 2 && len <= 3) {
1242 // Got a region
1243 replacedRegion = start;
1244 if (endOfField == nullptr) {
1245 return;
1246 }
1247 start = endOfField++;
1248 endOfField = const_cast<char*>(uprv_strchr(start, '_'):: strchr(start, '_'));
1249 if (endOfField == nullptr) {
1250 len = uprv_strlen(start):: strlen(start);
1251 } else {
1252 len = endOfField - start;
1253 *endOfField = '\0'; // null terminiate it.
1254 }
1255 }
1256 if (len >= 4) {
1257 // Got a variant
1258 replacedVariant = start;
1259 if (endOfField == nullptr) {
1260 return;
1261 }
1262 start = endOfField++;
1263 }
1264 replacedExtensions = start;
1265}
1266
1267bool
1268AliasReplacer::replaceLanguage(
1269 bool checkLanguage, bool checkRegion,
1270 bool checkVariants, UVector& toBeFreed, UErrorCode& status)
1271{
1272 if (U_FAILURE(status)) {
1273 return false;
1274 }
1275 if ( (checkRegion && region == nullptr) ||
1276 (checkVariants && variants.size() == 0)) {
1277 // Nothing to search.
1278 return false;
1279 }
1280 int32_t variant_size = checkVariants ? variants.size() : 1;
1281 // Since we may have more than one variant, we need to loop through them.
1282 const char* searchLanguage = checkLanguage ? language : "und";
1283 const char* searchRegion = checkRegion ? region : nullptr;
1284 const char* searchVariant = nullptr;
1285 for (int32_t variant_index = 0;
1286 variant_index < variant_size;
1287 variant_index++) {
1288 if (checkVariants) {
1289 U_ASSERT(variant_index < variant_size)(static_cast <bool> (variant_index < variant_size) ?
void (0) : __assert_fail ("variant_index < variant_size",
__builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
1290 searchVariant = static_cast<const char*>(variants.elementAt(variant_index));
1291 }
1292
1293 if (searchVariant != nullptr && uprv_strlen(searchVariant):: strlen(searchVariant) < 4) {
1294 // Do not consider ill-formed variant subtag.
1295 searchVariant = nullptr;
1296 }
1297 CharString typeKey;
1298 generateKey(searchLanguage, searchRegion, searchVariant, typeKey,
1299 status);
1300 if (U_FAILURE(status)) {
1301 return false;
1302 }
1303 const char *replacement = data->languageMap().get(typeKey.data());
1304 if (replacement == nullptr) {
1305 // Found no replacement data.
1306 continue;
1307 }
1308
1309 const char* replacedLanguage = nullptr;
1310 const char* replacedScript = nullptr;
1311 const char* replacedRegion = nullptr;
1312 const char* replacedVariant = nullptr;
1313 const char* replacedExtensions = nullptr;
1314 parseLanguageReplacement(replacement,
1315 replacedLanguage,
1316 replacedScript,
1317 replacedRegion,
1318 replacedVariant,
1319 replacedExtensions,
1320 toBeFreed,
1321 status);
1322 replacedLanguage =
1323 (replacedLanguage != nullptr && uprv_strcmp(replacedLanguage, "und"):: strcmp(replacedLanguage, "und") == 0) ?
1324 language : replacedLanguage;
1325 replacedScript = deleteOrReplace(script, nullptr, replacedScript);
1326 replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion);
1327 replacedVariant = deleteOrReplace(
1328 searchVariant, searchVariant, replacedVariant);
1329
1330 if ( same(language, replacedLanguage) &&
1331 same(script, replacedScript) &&
1332 same(region, replacedRegion) &&
1333 same(searchVariant, replacedVariant) &&
1334 replacedExtensions == nullptr) {
1335 // Replacement produce no changes.
1336 continue;
1337 }
1338
1339 language = replacedLanguage;
1340 region = replacedRegion;
1341 script = replacedScript;
1342 if (searchVariant != nullptr) {
1343 if (notEmpty(replacedVariant)) {
1344 variants.setElementAt((void*)replacedVariant, variant_index);
1345 } else {
1346 variants.removeElementAt(variant_index);
1347 }
1348 }
1349 if (replacedExtensions != nullptr) {
1350 // DO NOTHING
1351 // UTS35 does not specify what should we do if we have extensions in the
1352 // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
1353 // extensions in them languageAlias:
1354 // i_default => en_x_i_default
1355 // i_enochian => und_x_i_enochian
1356 // i_mingo => see_x_i_mingo
1357 // zh_min => nan_x_zh_min
1358 // But all of them are already changed by code inside ultag_parse() before
1359 // hitting this code.
1360 }
1361
1362 // Something changed by language alias data.
1363 return true;
1364 }
1365 // Nothing changed by language alias data.
1366 return false;
1367}
1368
1369bool
1370AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
1371{
1372 if (U_FAILURE(status)) {
1373 return false;
1374 }
1375 if (region == nullptr) {
1376 // No region to search.
1377 return false;
1378 }
1379 const char *replacement = data->territoryMap().get(region);
1380 if (replacement == nullptr) {
1381 // Found no replacement data for this region.
1382 return false;
1383 }
1384 const char* replacedRegion = replacement;
1385 const char* firstSpace = uprv_strchr(replacement, ' '):: strchr(replacement, ' ');
1386 if (firstSpace != nullptr) {
1387 // If there are are more than one region in the replacement.
1388 // We need to check which one match based on the language.
1389 // Cannot use nullptr for language because that will construct
1390 // the default locale, in that case, use "und" to get the correct
1391 // locale.
1392 Locale l = LocaleBuilder()
1393 .setLanguage(language == nullptr ? "und" : language)
1394 .setScript(script)
1395 .build(status);
1396 l.addLikelySubtags(status);
1397 const char* likelyRegion = l.getCountry();
1398 LocalPointer<CharString> item;
1399 if (likelyRegion != nullptr && uprv_strlen(likelyRegion):: strlen(likelyRegion) > 0) {
1400 size_t len = uprv_strlen(likelyRegion):: strlen(likelyRegion);
1401 const char* foundInReplacement = uprv_strstr(replacement,:: strstr(replacement, likelyRegion)
1402 likelyRegion):: strstr(replacement, likelyRegion);
1403 if (foundInReplacement != nullptr) {
1404 // Assuming the case there are no three letter region code in
1405 // the replacement of territoryAlias
1406 U_ASSERT(foundInReplacement == replacement ||(static_cast <bool> (foundInReplacement == replacement ||
*(foundInReplacement-1) == ' ') ? void (0) : __assert_fail (
"foundInReplacement == replacement || *(foundInReplacement-1) == ' '"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
1407 *(foundInReplacement-1) == ' ')(static_cast <bool> (foundInReplacement == replacement ||
*(foundInReplacement-1) == ' ') ? void (0) : __assert_fail (
"foundInReplacement == replacement || *(foundInReplacement-1) == ' '"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
1408 U_ASSERT(foundInReplacement[len] == ' ' ||(static_cast <bool> (foundInReplacement[len] == ' ' || foundInReplacement
[len] == '\0') ? void (0) : __assert_fail ("foundInReplacement[len] == ' ' || foundInReplacement[len] == '\\0'"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
1409 foundInReplacement[len] == '\0')(static_cast <bool> (foundInReplacement[len] == ' ' || foundInReplacement
[len] == '\0') ? void (0) : __assert_fail ("foundInReplacement[len] == ' ' || foundInReplacement[len] == '\\0'"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
1410 item.adoptInsteadAndCheckErrorCode(
1411 new CharString(foundInReplacement, static_cast<int32_t>(len), status), status);
1412 }
1413 }
1414 if (item.isNull() && U_SUCCESS(status)) {
1415 item.adoptInsteadAndCheckErrorCode(
1416 new CharString(replacement,
1417 static_cast<int32_t>(firstSpace - replacement), status), status);
1418 }
1419 if (U_FAILURE(status)) { return false; }
1420 replacedRegion = item->data();
1421 toBeFreed.adoptElement(item.orphan(), status);
1422 if (U_FAILURE(status)) { return false; }
1423 }
1424 U_ASSERT(!same(region, replacedRegion))(static_cast <bool> (!same(region, replacedRegion)) ? void
(0) : __assert_fail ("!same(region, replacedRegion)", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__))
;
1425 region = replacedRegion;
1426 // The region is changed by data in territory alias.
1427 return true;
1428}
1429
1430bool
1431AliasReplacer::replaceScript(UErrorCode& status)
1432{
1433 if (U_FAILURE(status)) {
1434 return false;
1435 }
1436 if (script == nullptr) {
1437 // No script to search.
1438 return false;
1439 }
1440 const char *replacement = data->scriptMap().get(script);
1441 if (replacement == nullptr) {
1442 // Found no replacement data for this script.
1443 return false;
1444 }
1445 U_ASSERT(!same(script, replacement))(static_cast <bool> (!same(script, replacement)) ? void
(0) : __assert_fail ("!same(script, replacement)", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__))
;
1446 script = replacement;
1447 // The script is changed by data in script alias.
1448 return true;
1449}
1450
1451bool
1452AliasReplacer::replaceVariant(UErrorCode& status)
1453{
1454 if (U_FAILURE(status)) {
1455 return false;
1456 }
1457 // Since we may have more than one variant, we need to loop through them.
1458 for (int32_t i = 0; i < variants.size(); i++) {
1459 const char* variant = static_cast<const char*>(variants.elementAt(i));
1460 const char *replacement = data->variantMap().get(variant);
1461 if (replacement == nullptr) {
1462 // Found no replacement data for this variant.
1463 continue;
1464 }
1465 U_ASSERT((uprv_strlen(replacement) >= 5 &&(static_cast <bool> ((:: strlen(replacement) >= 5 &&
:: strlen(replacement) <= 8) || (:: strlen(replacement) ==
4 && replacement[0] >= '0' && replacement
[0] <= '9')) ? void (0) : __assert_fail ("(:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) || (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement[0] <= '9')"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
1466 uprv_strlen(replacement) <= 8) ||(static_cast <bool> ((:: strlen(replacement) >= 5 &&
:: strlen(replacement) <= 8) || (:: strlen(replacement) ==
4 && replacement[0] >= '0' && replacement
[0] <= '9')) ? void (0) : __assert_fail ("(:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) || (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement[0] <= '9')"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
1467 (uprv_strlen(replacement) == 4 &&(static_cast <bool> ((:: strlen(replacement) >= 5 &&
:: strlen(replacement) <= 8) || (:: strlen(replacement) ==
4 && replacement[0] >= '0' && replacement
[0] <= '9')) ? void (0) : __assert_fail ("(:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) || (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement[0] <= '9')"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
1468 replacement[0] >= '0' &&(static_cast <bool> ((:: strlen(replacement) >= 5 &&
:: strlen(replacement) <= 8) || (:: strlen(replacement) ==
4 && replacement[0] >= '0' && replacement
[0] <= '9')) ? void (0) : __assert_fail ("(:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) || (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement[0] <= '9')"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
1469 replacement[0] <= '9'))(static_cast <bool> ((:: strlen(replacement) >= 5 &&
:: strlen(replacement) <= 8) || (:: strlen(replacement) ==
4 && replacement[0] >= '0' && replacement
[0] <= '9')) ? void (0) : __assert_fail ("(:: strlen(replacement) >= 5 && :: strlen(replacement) <= 8) || (:: strlen(replacement) == 4 && replacement[0] >= '0' && replacement[0] <= '9')"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
1470 if (!same(variant, replacement)) {
1471 variants.setElementAt((void*)replacement, i);
1472 // Special hack to handle hepburn-heploc => alalc97
1473 if (uprv_strcmp(variant, "heploc"):: strcmp(variant, "heploc") == 0) {
1474 for (int32_t j = 0; j < variants.size(); j++) {
1475 if (uprv_strcmp((const char*)(variants.elementAt(j)),:: strcmp((const char*)(variants.elementAt(j)), "hepburn")
1476 "hepburn"):: strcmp((const char*)(variants.elementAt(j)), "hepburn") == 0) {
1477 variants.removeElementAt(j);
1478 }
1479 }
1480 }
1481 return true;
1482 }
1483 }
1484 return false;
1485}
1486
1487bool
1488AliasReplacer::replaceSubdivision(
1489 StringPiece subdivision, CharString& output, UErrorCode& status)
1490{
1491 if (U_FAILURE(status)) {
1492 return false;
1493 }
1494 const char *replacement = data->subdivisionMap().get(subdivision.data());
1495 if (replacement != nullptr) {
1496 const char* firstSpace = uprv_strchr(replacement, ' '):: strchr(replacement, ' ');
1497 // Found replacement data for this subdivision.
1498 size_t len = (firstSpace != nullptr) ?
1499 (firstSpace - replacement) : uprv_strlen(replacement):: strlen(replacement);
1500 if (2 <= len && len <= 8) {
1501 output.append(replacement, static_cast<int32_t>(len), status);
1502 if (2 == len) {
1503 // Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
1504 output.append("zzzz", 4, status);
1505 }
1506 }
1507 return true;
1508 }
1509 return false;
1510}
1511
1512bool
1513AliasReplacer::replaceTransformedExtensions(
1514 CharString& transformedExtensions, CharString& output, UErrorCode& status)
1515{
1516 // The content of the transformedExtensions will be modified in this
1517 // function to NUL-terminating (tkey-tvalue) pairs.
1518 if (U_FAILURE(status)) {
1519 return false;
1520 }
1521 int32_t len = transformedExtensions.length();
1522 const char* str = transformedExtensions.data();
1523 const char* tkey = ultag_getTKeyStartultag_getTKeyStart_77(str);
1524 int32_t tlangLen = (tkey == str) ? 0 :
1525 ((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
1526 if (tlangLen > 0) {
1527 Locale tlang = LocaleBuilder()
1528 .setLanguageTag(StringPiece(str, tlangLen))
1529 .build(status);
1530 tlang.canonicalize(status);
1531 output = tlang.toLanguageTag<CharString>(status);
1532 if (U_FAILURE(status)) {
1533 return false;
1534 }
1535 T_CString_toLowerCaseT_CString_toLowerCase_77(output.data());
1536 }
1537 if (tkey != nullptr) {
1538 // We need to sort the tfields by tkey
1539 UVector tfields(status);
1540 if (U_FAILURE(status)) {
1541 return false;
1542 }
1543 do {
1544 const char* tvalue = uprv_strchr(tkey, '-'):: strchr(tkey, '-');
1545 if (tvalue == nullptr) {
1546 status = U_ILLEGAL_ARGUMENT_ERROR;
1547 return false;
1548 }
1549 const char* nextTKey = ultag_getTKeyStartultag_getTKeyStart_77(tvalue);
1550 if (nextTKey != nullptr) {
1551 *const_cast<char*>(nextTKey - 1) = '\0'; // NUL terminate tvalue
1552 }
1553 tfields.insertElementAt((void*)tkey, tfields.size(), status);
1554 if (U_FAILURE(status)) {
1555 return false;
1556 }
1557 tkey = nextTKey;
1558 } while (tkey != nullptr);
1559 tfields.sort([](UElement e1, UElement e2) -> int32_t {
1560 return uprv_strcmp((const char*)e1.pointer, (const char*)e2.pointer):: strcmp((const char*)e1.pointer, (const char*)e2.pointer);
1561 }, status);
1562 for (int32_t i = 0; i < tfields.size(); i++) {
1563 if (output.length() > 0) {
1564 output.append('-', status);
1565 }
1566 const char* tfield = static_cast<const char*>(tfields.elementAt(i));
1567 const char* tvalue = uprv_strchr(tfield, '-'):: strchr(tfield, '-');
1568 if (tvalue == nullptr) {
1569 status = U_ILLEGAL_ARGUMENT_ERROR;
1570 return false;
1571 }
1572 // Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
1573 *const_cast<char*>(tvalue++) = '\0'; // NUL terminate tkey
1574 output.append(tfield, status).append('-', status);
1575 std::optional<std::string_view> bcpTValue = ulocimp_toBcpTypeulocimp_toBcpType_77(tfield, tvalue);
1576 output.append(bcpTValue.has_value() ? *bcpTValue : tvalue, status);
1577 }
1578 }
1579 if (U_FAILURE(status)) {
1580 return false;
1581 }
1582 return true;
1583}
1584
1585CharString&
1586AliasReplacer::outputToString(
1587 CharString& out, UErrorCode& status)
1588{
1589 if (U_FAILURE(status)) { return out; }
1590 out.append(language, status);
1591 if (notEmpty(script)) {
1592 out.append(SEP_CHAR'_', status)
1593 .append(script, status);
1594 }
1595 if (notEmpty(region)) {
1596 out.append(SEP_CHAR'_', status)
1597 .append(region, status);
1598 }
1599 if (variants.size() > 0) {
1600 if (!notEmpty(script) && !notEmpty(region)) {
1601 out.append(SEP_CHAR'_', status);
1602 }
1603 variants.sort([](UElement e1, UElement e2) -> int32_t {
1604 return uprv_strcmp((const char*)e1.pointer, (const char*)e2.pointer):: strcmp((const char*)e1.pointer, (const char*)e2.pointer);
1605 }, status);
1606 int32_t variantsStart = out.length();
1607 for (int32_t i = 0; i < variants.size(); i++) {
1608 out.append(SEP_CHAR'_', status)
1609 .append(static_cast<const char*>(variants.elementAt(i)),
1610 status);
1611 }
1612 T_CString_toUpperCaseT_CString_toUpperCase_77(out.data() + variantsStart);
1613 }
1614 if (notEmpty(extensions)) {
1615 CharString tmp("und_", status);
1616 tmp.append(extensions, status);
1617 Locale tmpLocale(tmp.data());
1618 // only support x extension inside CLDR for now.
1619 U_ASSERT(extensions[0] == 'x')(static_cast <bool> (extensions[0] == 'x') ? void (0) :
__assert_fail ("extensions[0] == 'x'", __builtin_FILE (), __builtin_LINE
(), __extension__ __PRETTY_FUNCTION__))
;
1620 out.append(tmpLocale.getName() + 1, status);
1621 }
1622 return out;
1623}
1624
1625bool
1626AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status)
1627{
1628 data = AliasData::singleton(status);
1629 if (U_FAILURE(status)) {
1630 return false;
1631 }
1632 U_ASSERT(data != nullptr)(static_cast <bool> (data != nullptr) ? void (0) : __assert_fail
("data != nullptr", __builtin_FILE (), __builtin_LINE (), __extension__
__PRETTY_FUNCTION__))
;
1633 out.clear();
1634 language = locale.getLanguage();
1635 if (!notEmpty(language)) {
1636 language = nullptr;
1637 }
1638 script = locale.getScript();
1639 if (!notEmpty(script)) {
1640 script = nullptr;
1641 }
1642 region = locale.getCountry();
1643 if (!notEmpty(region)) {
1644 region = nullptr;
1645 }
1646 const char* variantsStr = locale.getVariant();
1647 CharString variantsBuff(variantsStr, -1, status);
1648 if (!variantsBuff.isEmpty()) {
1649 if (U_FAILURE(status)) { return false; }
1650 char* start = variantsBuff.data();
1651 T_CString_toLowerCaseT_CString_toLowerCase_77(start);
1652 char* end;
1653 while ((end = uprv_strchr(start, SEP_CHAR):: strchr(start, '_')) != nullptr &&
1654 U_SUCCESS(status)) {
1655 *end = NULL_CHAR'\0'; // null terminate inside variantsBuff
1656 // do not add "" or duplicate data to variants
1657 if (*start && !variants.contains(start)) {
1658 variants.addElement(start, status);
1659 }
1660 start = end + 1;
1661 }
1662 // do not add "" or duplicate data to variants
1663 if (*start && !variants.contains(start)) {
1664 variants.addElement(start, status);
1665 }
1666 }
1667 if (U_FAILURE(status)) { return false; }
1668
1669 // Sort the variants
1670 variants.sort([](UElement e1, UElement e2) -> int32_t {
1671 return uprv_strcmp((const char*)e1.pointer, (const char*)e2.pointer):: strcmp((const char*)e1.pointer, (const char*)e2.pointer);
1672 }, status);
1673
1674 // A changed count to assert when loop too many times.
1675 int changed = 0;
1676 // A UVector to to hold CharString allocated by the replace* method
1677 // and freed when out of scope from his function.
1678 UVector stringsToBeFreed([](void *obj) { delete static_cast<CharString*>(obj); },
1679 nullptr, 10, status);
1680 while (U_SUCCESS(status)) {
1681 // Something wrong with the data cause looping here more than 10 times
1682 // already.
1683 U_ASSERT(changed < 5)(static_cast <bool> (changed < 5) ? void (0) : __assert_fail
("changed < 5", __builtin_FILE (), __builtin_LINE (), __extension__
__PRETTY_FUNCTION__))
;
1684 // From observation of key in data/misc/metadata.txt
1685 // we know currently we only need to search in the following combination
1686 // of fields for type in languageAlias:
1687 // * lang_region_variant
1688 // * lang_region
1689 // * lang_variant
1690 // * lang
1691 // * und_variant
1692 // This assumption is ensured by the U_ASSERT in readLanguageAlias
1693 //
1694 // lang REGION variant
1695 if ( replaceLanguage(true, true, true, stringsToBeFreed, status) ||
1696 replaceLanguage(true, true, false, stringsToBeFreed, status) ||
1697 replaceLanguage(true, false, true, stringsToBeFreed, status) ||
1698 replaceLanguage(true, false, false, stringsToBeFreed, status) ||
1699 replaceLanguage(false,false, true, stringsToBeFreed, status) ||
1700 replaceTerritory(stringsToBeFreed, status) ||
1701 replaceScript(status) ||
1702 replaceVariant(status)) {
1703 // Some values in data is changed, try to match from the beginning
1704 // again.
1705 changed++;
1706 continue;
1707 }
1708 // Nothing changed. Break out.
1709 break;
1710 } // while(1)
1711
1712 if (U_FAILURE(status)) { return false; }
1713 // Nothing changed and we know the order of the variants are not change
1714 // because we have no variant or only one.
1715 const char* extensionsStr = locale_getKeywordsStartlocale_getKeywordsStart_77(locale.getName());
1716 if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
1717 return false;
1718 }
1719 outputToString(out, status);
1720 if (U_FAILURE(status)) {
1721 return false;
1722 }
1723 if (extensionsStr != nullptr) {
1724 changed = 0;
1725 Locale temp(locale);
1726 LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
1727 if (U_SUCCESS(status) && !iter.isNull()) {
1728 const char* key;
1729 while ((key = iter->next(nullptr, status)) != nullptr) {
1730 if (uprv_strcmp("sd", key):: strcmp("sd", key) == 0 || uprv_strcmp("rg", key):: strcmp("rg", key) == 0 ||
1731 uprv_strcmp("t", key):: strcmp("t", key) == 0) {
1732 auto value = locale.getKeywordValue<CharString>(key, status);
1733 if (U_FAILURE(status)) {
1734 status = U_ZERO_ERROR;
1735 continue;
1736 }
1737 CharString replacement;
1738 if (uprv_strlen(key):: strlen(key) == 2) {
1739 if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
1740 changed++;
1741 temp.setKeywordValue(key, replacement.data(), status);
1742 }
1743 } else {
1744 U_ASSERT(uprv_strcmp(key, "t") == 0)(static_cast <bool> (:: strcmp(key, "t") == 0) ? void (
0) : __assert_fail (":: strcmp(key, \"t\") == 0", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__))
;
1745 if (replaceTransformedExtensions(value, replacement, status)) {
1746 changed++;
1747 temp.setKeywordValue(key, replacement.data(), status);
1748 }
1749 }
1750 if (U_FAILURE(status)) {
1751 return false;
1752 }
1753 }
1754 }
1755 }
1756 if (changed != 0) {
1757 extensionsStr = locale_getKeywordsStartlocale_getKeywordsStart_77(temp.getName());
1758 }
1759 out.append(extensionsStr, status);
1760 }
1761 if (U_FAILURE(status)) {
1762 return false;
1763 }
1764 // If the tag is not changed, return.
1765 if (uprv_strcmp(out.data(), locale.getName()):: strcmp(out.data(), locale.getName()) == 0) {
1766 out.clear();
1767 return false;
1768 }
1769 return true;
1770}
1771
1772// Return true if the locale is changed during canonicalization.
1773// The replaced value then will be put into out.
1774bool
1775canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
1776{
1777 if (U_FAILURE(status)) { return false; }
1778 AliasReplacer replacer(status);
1779 return replacer.replace(locale, out, status);
1780}
1781
1782// Function to optimize for known cases without so we can skip the loading
1783// of resources in the startup time until we really need it.
1784bool
1785isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
1786{
1787 if (U_FAILURE(status)) { return false; }
1788
1789 if ( uprv_strcmp(locale, "c"):: strcmp(locale, "c") == 0 ||
1790 uprv_strcmp(locale, "en"):: strcmp(locale, "en") == 0 ||
1791 uprv_strcmp(locale, "en_US"):: strcmp(locale, "en_US") == 0) {
1792 return true;
1793 }
1794
1795 // common well-known Canonicalized.
1796 umtx_initOnce(gKnownCanonicalizedInitOnce,
1797 &loadKnownCanonicalized, status);
1798 if (U_FAILURE(status)) {
1799 return false;
1800 }
1801 U_ASSERT(gKnownCanonicalized != nullptr)(static_cast <bool> (gKnownCanonicalized != nullptr) ? void
(0) : __assert_fail ("gKnownCanonicalized != nullptr", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__))
;
1802 return uhash_getiuhash_geti_77(gKnownCanonicalized, locale) != 0;
1803}
1804
1805} // namespace
1806
1807U_NAMESPACE_END}
1808
1809// Function for testing.
1810U_EXPORT const char* const*
1811ulocimp_getKnownCanonicalizedLocaleForTestulocimp_getKnownCanonicalizedLocaleForTest_77(int32_t& length)
1812{
1813 U_NAMESPACE_USEusing namespace icu_77;
1814 length = UPRV_LENGTHOF(KNOWN_CANONICALIZED)(int32_t)(sizeof(KNOWN_CANONICALIZED)/sizeof((KNOWN_CANONICALIZED
)[0]))
;
1815 return KNOWN_CANONICALIZED;
1816}
1817
1818// Function for testing.
1819U_EXPORT bool
1820ulocimp_isCanonicalizedLocaleForTestulocimp_isCanonicalizedLocaleForTest_77(const char* localeName)
1821{
1822 U_NAMESPACE_USEusing namespace icu_77;
1823 Locale l(localeName);
1824 UErrorCode status = U_ZERO_ERROR;
1825 CharString temp;
1826 return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status);
1827}
1828
1829U_NAMESPACE_BEGINnamespace icu_77 {
1830
1831Locale& Locale::init(const char* localeID, UBool canonicalize)
1832{
1833 return localeID == nullptr ? *this = getDefault() : init(StringPiece{localeID}, canonicalize);
1834}
1835
1836/*This function initializes a Locale from a C locale ID*/
1837Locale& Locale::init(StringPiece localeID, UBool canonicalize)
1838{
1839 fIsBogus = false;
1840 /* Free our current storage */
1841 if ((baseName != fullName) && (baseName != fullNameBuffer)) {
1842 uprv_freeuprv_free_77(baseName);
1843 }
1844 baseName = nullptr;
1845 if(fullName != fullNameBuffer) {
1846 uprv_freeuprv_free_77(fullName);
1847 fullName = fullNameBuffer;
1848 }
1849
1850 // not a loop:
1851 // just an easy way to have a common error-exit
1852 // without goto and without another function
1853 do {
1854 char *separator;
1855 char *field[5] = {nullptr};
1856 int32_t fieldLen[5] = {0};
1857 int32_t fieldIdx;
1858 int32_t variantField;
1859 int32_t length;
1860 UErrorCode err;
1861
1862 /* preset all fields to empty */
1863 language[0] = script[0] = country[0] = 0;
1864
1865 const auto parse = [canonicalize](std::string_view localeID,
1866 char* name,
1867 int32_t nameCapacity,
1868 UErrorCode& status) {
1869 return ByteSinkUtil::viaByteSinkToTerminatedChars(
1870 name, nameCapacity,
1871 [&](ByteSink& sink, UErrorCode& status) {
1872 if (canonicalize) {
1873 ulocimp_canonicalizeulocimp_canonicalize_77(localeID, sink, status);
1874 } else {
1875 ulocimp_getNameulocimp_getName_77(localeID, sink, status);
1876 }
1877 },
1878 status);
1879 };
1880
1881 // "canonicalize" the locale ID to ICU/Java format
1882 err = U_ZERO_ERROR;
1883 length = parse(localeID, fullName, sizeof fullNameBuffer, err);
1884
1885 if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast<int32_t>(sizeof(fullNameBuffer))) {
1886 U_ASSERT(baseName == nullptr)(static_cast <bool> (baseName == nullptr) ? void (0) : __assert_fail
("baseName == nullptr", __builtin_FILE (), __builtin_LINE ()
, __extension__ __PRETTY_FUNCTION__))
;
1887 /*Go to heap for the fullName if necessary*/
1888 char* newFullName = static_cast<char*>(uprv_mallocuprv_malloc_77(sizeof(char) * (length + 1)));
1889 if (newFullName == nullptr) {
1890 break; // error: out of memory
1891 }
1892 fullName = newFullName;
1893 err = U_ZERO_ERROR;
1894 length = parse(localeID, fullName, length + 1, err);
1895 }
1896 if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
1897 /* should never occur */
1898 break;
1899 }
1900
1901 variantBegin = length;
1902
1903 /* after uloc_getName/canonicalize() we know that only '_' are separators */
1904 /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
1905 separator = field[0] = fullName;
Value stored to 'separator' is never read
1906 fieldIdx = 1;
1907 char* at = uprv_strchr(fullName, '@'):: strchr(fullName, '@');
1908 while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR):: strchr(field[fieldIdx-1], '_')) != nullptr &&
1909 fieldIdx < UPRV_LENGTHOF(field)(int32_t)(sizeof(field)/sizeof((field)[0]))-1 &&
1910 (at == nullptr || separator < at)) {
1911 field[fieldIdx] = separator + 1;
1912 fieldLen[fieldIdx - 1] = static_cast<int32_t>(separator - field[fieldIdx - 1]);
1913 fieldIdx++;
1914 }
1915 // variant may contain @foo or .foo POSIX cruft; remove it
1916 separator = uprv_strchr(field[fieldIdx-1], '@'):: strchr(field[fieldIdx-1], '@');
1917 char* sep2 = uprv_strchr(field[fieldIdx-1], '.'):: strchr(field[fieldIdx-1], '.');
1918 if (separator!=nullptr || sep2!=nullptr) {
1919 if (separator==nullptr || (sep2!=nullptr && separator > sep2)) {
1920 separator = sep2;
1921 }
1922 fieldLen[fieldIdx - 1] = static_cast<int32_t>(separator - field[fieldIdx - 1]);
1923 } else {
1924 fieldLen[fieldIdx - 1] = length - static_cast<int32_t>(field[fieldIdx - 1] - fullName);
1925 }
1926
1927 if (fieldLen[0] >= static_cast<int32_t>(sizeof(language)))
1928 {
1929 break; // error: the language field is too long
1930 }
1931
1932 variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
1933 if (fieldLen[0] > 0) {
1934 /* We have a language */
1935 uprv_memcpy(language, fullName, fieldLen[0])do { clang diagnostic push clang diagnostic ignored "-Waddress"
(static_cast <bool> (language != __null) ? void (0) :
__assert_fail ("language != __null", __builtin_FILE (), __builtin_LINE
(), __extension__ __PRETTY_FUNCTION__)); (static_cast <bool
> (fullName != __null) ? void (0) : __assert_fail ("fullName != __null"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
)); clang diagnostic pop :: memcpy(language, fullName, fieldLen
[0]); } while (false)
;
1936 language[fieldLen[0]] = 0;
1937 }
1938 if (fieldLen[1] == 4 && uprv_isASCIILetteruprv_isASCIILetter_77(field[1][0]) &&
1939 uprv_isASCIILetteruprv_isASCIILetter_77(field[1][1]) && uprv_isASCIILetteruprv_isASCIILetter_77(field[1][2]) &&
1940 uprv_isASCIILetteruprv_isASCIILetter_77(field[1][3])) {
1941 /* We have at least a script */
1942 uprv_memcpy(script, field[1], fieldLen[1])do { clang diagnostic push clang diagnostic ignored "-Waddress"
(static_cast <bool> (script != __null) ? void (0) : __assert_fail
("script != __null", __builtin_FILE (), __builtin_LINE (), __extension__
__PRETTY_FUNCTION__)); (static_cast <bool> (field[1] !=
__null) ? void (0) : __assert_fail ("field[1] != __null", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); clang
diagnostic pop :: memcpy(script, field[1], fieldLen[1]); } while
(false)
;
1943 script[fieldLen[1]] = 0;
1944 variantField++;
1945 }
1946
1947 if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
1948 /* We have a country */
1949 uprv_memcpy(country, field[variantField], fieldLen[variantField])do { clang diagnostic push clang diagnostic ignored "-Waddress"
(static_cast <bool> (country != __null) ? void (0) : __assert_fail
("country != __null", __builtin_FILE (), __builtin_LINE (), __extension__
__PRETTY_FUNCTION__)); (static_cast <bool> (field[variantField
] != __null) ? void (0) : __assert_fail ("field[variantField] != __null"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
)); clang diagnostic pop :: memcpy(country, field[variantField
], fieldLen[variantField]); } while (false)
;
1950 country[fieldLen[variantField]] = 0;
1951 variantField++;
1952 } else if (fieldLen[variantField] == 0) {
1953 variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
1954 }
1955
1956 if (fieldLen[variantField] > 0) {
1957 /* We have a variant */
1958 variantBegin = static_cast<int32_t>(field[variantField] - fullName);
1959 }
1960
1961 err = U_ZERO_ERROR;
1962 initBaseName(err);
1963 if (U_FAILURE(err)) {
1964 break;
1965 }
1966
1967 if (canonicalize) {
1968 if (!isKnownCanonicalizedLocale(fullName, err)) {
1969 CharString replaced;
1970 // Not sure it is already canonicalized
1971 if (canonicalizeLocale(*this, replaced, err)) {
1972 U_ASSERT(U_SUCCESS(err))(static_cast <bool> (U_SUCCESS(err)) ? void (0) : __assert_fail
("U_SUCCESS(err)", __builtin_FILE (), __builtin_LINE (), __extension__
__PRETTY_FUNCTION__))
;
1973 // If need replacement, call init again.
1974 init(replaced.data(), false);
1975 }
1976 if (U_FAILURE(err)) {
1977 break;
1978 }
1979 }
1980 } // if (canonicalize) {
1981
1982 // successful end of init()
1983 return *this;
1984 } while(0); /*loop doesn't iterate*/
1985
1986 // when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
1987 setToBogus();
1988
1989 return *this;
1990}
1991
1992/*
1993 * Set up the base name.
1994 * If there are no key words, it's exactly the full name.
1995 * If key words exist, it's the full name truncated at the '@' character.
1996 * Need to set up both at init() and after setting a keyword.
1997 */
1998void
1999Locale::initBaseName(UErrorCode &status) {
2000 if (U_FAILURE(status)) {
2001 return;
2002 }
2003 U_ASSERT(baseName==nullptr || baseName==fullName)(static_cast <bool> (baseName==nullptr || baseName==fullName
) ? void (0) : __assert_fail ("baseName==nullptr || baseName==fullName"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
2004 const char *atPtr = uprv_strchr(fullName, '@'):: strchr(fullName, '@');
2005 const char *eqPtr = uprv_strchr(fullName, '='):: strchr(fullName, '=');
2006 if (atPtr && eqPtr && atPtr < eqPtr) {
2007 // Key words exist.
2008 int32_t baseNameLength = static_cast<int32_t>(atPtr - fullName);
2009 char* newBaseName = static_cast<char*>(uprv_mallocuprv_malloc_77(baseNameLength + 1));
2010 if (newBaseName == nullptr) {
2011 status = U_MEMORY_ALLOCATION_ERROR;
2012 return;
2013 }
2014 baseName = newBaseName;
2015 uprv_strncpy(baseName, fullName, baseNameLength):: strncpy(baseName, fullName, baseNameLength);
2016 baseName[baseNameLength] = 0;
2017
2018 // The original computation of variantBegin leaves it equal to the length
2019 // of fullName if there is no variant. It should instead be
2020 // the length of the baseName.
2021 if (variantBegin > baseNameLength) {
2022 variantBegin = baseNameLength;
2023 }
2024 } else {
2025 baseName = fullName;
2026 }
2027}
2028
2029
2030int32_t
2031Locale::hashCode() const
2032{
2033 return ustr_hashCharsNustr_hashCharsN_77(fullName, static_cast<int32_t>(uprv_strlen(fullName):: strlen(fullName)));
2034}
2035
2036void
2037Locale::setToBogus() {
2038 /* Free our current storage */
2039 if((baseName != fullName) && (baseName != fullNameBuffer)) {
2040 uprv_freeuprv_free_77(baseName);
2041 }
2042 baseName = nullptr;
2043 if(fullName != fullNameBuffer) {
2044 uprv_freeuprv_free_77(fullName);
2045 fullName = fullNameBuffer;
2046 }
2047 *fullNameBuffer = 0;
2048 *language = 0;
2049 *script = 0;
2050 *country = 0;
2051 fIsBogus = true;
2052 variantBegin = 0;
2053}
2054
2055const Locale& U_EXPORT2
2056Locale::getDefault()
2057{
2058 {
2059 Mutex lock(&gDefaultLocaleMutex);
2060 if (gDefaultLocale != nullptr) {
2061 return *gDefaultLocale;
2062 }
2063 }
2064 UErrorCode status = U_ZERO_ERROR;
2065 return *locale_set_default_internal(nullptr, status);
2066}
2067
2068
2069
2070void U_EXPORT2
2071Locale::setDefault( const Locale& newLocale,
2072 UErrorCode& status)
2073{
2074 if (U_FAILURE(status)) {
2075 return;
2076 }
2077
2078 /* Set the default from the full name string of the supplied locale.
2079 * This is a convenient way to access the default locale caching mechanisms.
2080 */
2081 const char *localeID = newLocale.getName();
2082 locale_set_default_internal(localeID, status);
2083}
2084
2085void
2086Locale::addLikelySubtags(UErrorCode& status) {
2087 if (U_FAILURE(status)) {
2088 return;
2089 }
2090
2091 CharString maximizedLocaleID = ulocimp_addLikelySubtagsulocimp_addLikelySubtags_77(fullName, status);
2092
2093 if (U_FAILURE(status)) {
2094 return;
2095 }
2096
2097 init(maximizedLocaleID.data(), /*canonicalize=*/false);
2098 if (isBogus()) {
2099 status = U_ILLEGAL_ARGUMENT_ERROR;
2100 }
2101}
2102
2103void
2104Locale::minimizeSubtags(UErrorCode& status) {
2105 Locale::minimizeSubtags(false, status);
2106}
2107void
2108Locale::minimizeSubtags(bool favorScript, UErrorCode& status) {
2109 if (U_FAILURE(status)) {
2110 return;
2111 }
2112
2113 CharString minimizedLocaleID = ulocimp_minimizeSubtagsulocimp_minimizeSubtags_77(fullName, favorScript, status);
2114
2115 if (U_FAILURE(status)) {
2116 return;
2117 }
2118
2119 init(minimizedLocaleID.data(), /*canonicalize=*/false);
2120 if (isBogus()) {
2121 status = U_ILLEGAL_ARGUMENT_ERROR;
2122 }
2123}
2124
2125void
2126Locale::canonicalize(UErrorCode& status) {
2127 if (U_FAILURE(status)) {
2128 return;
2129 }
2130 if (isBogus()) {
2131 status = U_ILLEGAL_ARGUMENT_ERROR;
2132 return;
2133 }
2134 CharString uncanonicalized(fullName, status);
2135 if (U_FAILURE(status)) {
2136 return;
2137 }
2138 init(uncanonicalized.data(), /*canonicalize=*/true);
2139 if (isBogus()) {
2140 status = U_ILLEGAL_ARGUMENT_ERROR;
2141 }
2142}
2143
2144Locale U_EXPORT2
2145Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
2146{
2147 Locale result(Locale::eBOGUS);
2148
2149 if (U_FAILURE(status)) {
2150 return result;
2151 }
2152
2153 // If a BCP 47 language tag is passed as the language parameter to the
2154 // normal Locale constructor, it will actually fall back to invoking
2155 // uloc_forLanguageTag() to parse it if it somehow is able to detect that
2156 // the string actually is BCP 47. This works well for things like strings
2157 // using BCP 47 extensions, but it does not at all work for things like
2158 // legacy language tags (marked as “Type: grandfathered” in BCP 47,
2159 // e.g., "en-GB-oed") which are possible to also
2160 // interpret as ICU locale IDs and because of that won't trigger the BCP 47
2161 // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
2162 // and then Locale::init(), instead of just calling the normal constructor.
2163
2164 int32_t parsedLength;
2165 CharString localeID = ulocimp_forLanguageTagulocimp_forLanguageTag_77(
2166 tag.data(),
2167 tag.length(),
2168 &parsedLength,
2169 status);
2170
2171 if (U_FAILURE(status)) {
2172 return result;
2173 }
2174
2175 if (parsedLength != tag.size()) {
2176 status = U_ILLEGAL_ARGUMENT_ERROR;
2177 return result;
2178 }
2179
2180 result.init(localeID.data(), /*canonicalize=*/false);
2181 if (result.isBogus()) {
2182 status = U_ILLEGAL_ARGUMENT_ERROR;
2183 }
2184 return result;
2185}
2186
2187void
2188Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
2189{
2190 if (U_FAILURE(status)) {
2191 return;
2192 }
2193
2194 if (fIsBogus) {
2195 status = U_ILLEGAL_ARGUMENT_ERROR;
2196 return;
2197 }
2198
2199 ulocimp_toLanguageTagulocimp_toLanguageTag_77(fullName, sink, /*strict=*/false, status);
2200}
2201
2202Locale U_EXPORT2
2203Locale::createFromName (const char *name)
2204{
2205 if (name) {
2206 Locale l("");
2207 l.init(name, false);
2208 return l;
2209 }
2210 else {
2211 return getDefault();
2212 }
2213}
2214
2215Locale U_EXPORT2
2216Locale::createFromName(StringPiece name) {
2217 Locale loc("");
2218 loc.init(name, false);
2219 return loc;
2220}
2221
2222Locale U_EXPORT2
2223Locale::createCanonical(const char* name) {
2224 Locale loc("");
2225 loc.init(name, true);
2226 return loc;
2227}
2228
2229const char *
2230Locale::getISO3Language() const
2231{
2232 return uloc_getISO3Languageuloc_getISO3Language_77(fullName);
2233}
2234
2235
2236const char *
2237Locale::getISO3Country() const
2238{
2239 return uloc_getISO3Countryuloc_getISO3Country_77(fullName);
2240}
2241
2242/**
2243 * Return the LCID value as specified in the "LocaleID" resource for this
2244 * locale. The LocaleID must be expressed as a hexadecimal number, from
2245 * one to four digits. If the LocaleID resource is not present, or is
2246 * in an incorrect format, 0 is returned. The LocaleID is for use in
2247 * Windows (it is an LCID), but is available on all platforms.
2248 */
2249uint32_t
2250Locale::getLCID() const
2251{
2252 return uloc_getLCIDuloc_getLCID_77(fullName);
2253}
2254
2255const char* const* U_EXPORT2 Locale::getISOCountries()
2256{
2257 return uloc_getISOCountriesuloc_getISOCountries_77();
2258}
2259
2260const char* const* U_EXPORT2 Locale::getISOLanguages()
2261{
2262 return uloc_getISOLanguagesuloc_getISOLanguages_77();
2263}
2264
2265// Set the locale's data based on a posix id.
2266void Locale::setFromPOSIXID(const char *posixID)
2267{
2268 init(posixID, true);
2269}
2270
2271const Locale & U_EXPORT2
2272Locale::getRoot()
2273{
2274 return getLocale(eROOT);
2275}
2276
2277const Locale & U_EXPORT2
2278Locale::getEnglish()
2279{
2280 return getLocale(eENGLISH);
2281}
2282
2283const Locale & U_EXPORT2
2284Locale::getFrench()
2285{
2286 return getLocale(eFRENCH);
2287}
2288
2289const Locale & U_EXPORT2
2290Locale::getGerman()
2291{
2292 return getLocale(eGERMAN);
2293}
2294
2295const Locale & U_EXPORT2
2296Locale::getItalian()
2297{
2298 return getLocale(eITALIAN);
2299}
2300
2301const Locale & U_EXPORT2
2302Locale::getJapanese()
2303{
2304 return getLocale(eJAPANESE);
2305}
2306
2307const Locale & U_EXPORT2
2308Locale::getKorean()
2309{
2310 return getLocale(eKOREAN);
2311}
2312
2313const Locale & U_EXPORT2
2314Locale::getChinese()
2315{
2316 return getLocale(eCHINESE);
2317}
2318
2319const Locale & U_EXPORT2
2320Locale::getSimplifiedChinese()
2321{
2322 return getLocale(eCHINA);
2323}
2324
2325const Locale & U_EXPORT2
2326Locale::getTraditionalChinese()
2327{
2328 return getLocale(eTAIWAN);
2329}
2330
2331
2332const Locale & U_EXPORT2
2333Locale::getFrance()
2334{
2335 return getLocale(eFRANCE);
2336}
2337
2338const Locale & U_EXPORT2
2339Locale::getGermany()
2340{
2341 return getLocale(eGERMANY);
2342}
2343
2344const Locale & U_EXPORT2
2345Locale::getItaly()
2346{
2347 return getLocale(eITALY);
2348}
2349
2350const Locale & U_EXPORT2
2351Locale::getJapan()
2352{
2353 return getLocale(eJAPAN);
2354}
2355
2356const Locale & U_EXPORT2
2357Locale::getKorea()
2358{
2359 return getLocale(eKOREA);
2360}
2361
2362const Locale & U_EXPORT2
2363Locale::getChina()
2364{
2365 return getLocale(eCHINA);
2366}
2367
2368const Locale & U_EXPORT2
2369Locale::getPRC()
2370{
2371 return getLocale(eCHINA);
2372}
2373
2374const Locale & U_EXPORT2
2375Locale::getTaiwan()
2376{
2377 return getLocale(eTAIWAN);
2378}
2379
2380const Locale & U_EXPORT2
2381Locale::getUK()
2382{
2383 return getLocale(eUK);
2384}
2385
2386const Locale & U_EXPORT2
2387Locale::getUS()
2388{
2389 return getLocale(eUS);
2390}
2391
2392const Locale & U_EXPORT2
2393Locale::getCanada()
2394{
2395 return getLocale(eCANADA);
2396}
2397
2398const Locale & U_EXPORT2
2399Locale::getCanadaFrench()
2400{
2401 return getLocale(eCANADA_FRENCH);
2402}
2403
2404const Locale &
2405Locale::getLocale(int locid)
2406{
2407 Locale *localeCache = getLocaleCache();
2408 U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0))(static_cast <bool> ((locid < eMAX_LOCALES)&&
(locid>=0)) ? void (0) : __assert_fail ("(locid < eMAX_LOCALES)&&(locid>=0)"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
))
;
2409 if (localeCache == nullptr) {
2410 // Failure allocating the locale cache.
2411 // The best we can do is return a nullptr reference.
2412 locid = 0;
2413 }
2414 return localeCache[locid]; /*operating on nullptr*/
2415}
2416
2417/*
2418This function is defined this way in order to get around static
2419initialization and static destruction.
2420 */
2421Locale *
2422Locale::getLocaleCache()
2423{
2424 UErrorCode status = U_ZERO_ERROR;
2425 umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
2426 return gLocaleCache;
2427}
2428
2429class KeywordEnumeration : public StringEnumeration {
2430protected:
2431 CharString keywords;
2432private:
2433 const char *current;
2434 static const char fgClassID;
2435
2436public:
2437 static UClassID U_EXPORT2 getStaticClassID() { return (UClassID)&fgClassID; }
2438 virtual UClassID getDynamicClassID() const override { return getStaticClassID(); }
2439public:
2440 KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
2441 : keywords(), current(keywords.data()) {
2442 if(U_SUCCESS(status) && keywordLen != 0) {
2443 if(keys == nullptr || keywordLen < 0) {
2444 status = U_ILLEGAL_ARGUMENT_ERROR;
2445 } else {
2446 keywords.append(keys, keywordLen, status);
2447 current = keywords.data() + currentIndex;
2448 }
2449 }
2450 }
2451
2452 virtual ~KeywordEnumeration();
2453
2454 virtual StringEnumeration * clone() const override
2455 {
2456 UErrorCode status = U_ZERO_ERROR;
2457 return new KeywordEnumeration(
2458 keywords.data(), keywords.length(),
2459 static_cast<int32_t>(current - keywords.data()), status);
2460 }
2461
2462 virtual int32_t count(UErrorCode& status) const override {
2463 if (U_FAILURE(status)) { return 0; }
2464 const char *kw = keywords.data();
2465 int32_t result = 0;
2466 while(*kw) {
2467 result++;
2468 kw += uprv_strlen(kw):: strlen(kw)+1;
2469 }
2470 return result;
2471 }
2472
2473 virtual const char* next(int32_t* resultLength, UErrorCode& status) override {
2474 const char* result;
2475 int32_t len;
2476 if(U_SUCCESS(status) && *current != 0) {
2477 result = current;
2478 len = static_cast<int32_t>(uprv_strlen(current):: strlen(current));
2479 current += len+1;
2480 if(resultLength != nullptr) {
2481 *resultLength = len;
2482 }
2483 } else {
2484 if(resultLength != nullptr) {
2485 *resultLength = 0;
2486 }
2487 result = nullptr;
2488 }
2489 return result;
2490 }
2491
2492 virtual const UnicodeString* snext(UErrorCode& status) override {
2493 if (U_FAILURE(status)) { return nullptr; }
2494 int32_t resultLength = 0;
2495 const char *s = next(&resultLength, status);
2496 return setChars(s, resultLength, status);
2497 }
2498
2499 virtual void reset(UErrorCode& status) override {
2500 if (U_FAILURE(status)) { return; }
2501 current = keywords.data();
2502 }
2503};
2504
2505const char KeywordEnumeration::fgClassID = '\0';
2506
2507// Out-of-line virtual destructor to serve as the "key function".
2508KeywordEnumeration::~KeywordEnumeration() = default;
2509
2510// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
2511// the next() method for each keyword before returning it.
2512class UnicodeKeywordEnumeration : public KeywordEnumeration {
2513public:
2514 using KeywordEnumeration::KeywordEnumeration;
2515 virtual ~UnicodeKeywordEnumeration();
2516
2517 virtual const char* next(int32_t* resultLength, UErrorCode& status) override {
2518 const char* legacy_key = KeywordEnumeration::next(nullptr, status);
2519 while (U_SUCCESS(status) && legacy_key != nullptr) {
2520 const char* key = uloc_toUnicodeLocaleKeyuloc_toUnicodeLocaleKey_77(legacy_key);
2521 if (key != nullptr) {
2522 if (resultLength != nullptr) {
2523 *resultLength = static_cast<int32_t>(uprv_strlen(key):: strlen(key));
2524 }
2525 return key;
2526 }
2527 // Not a Unicode keyword, could be a t, x or other, continue to look at the next one.
2528 legacy_key = KeywordEnumeration::next(nullptr, status);
2529 }
2530 if (resultLength != nullptr) *resultLength = 0;
2531 return nullptr;
2532 }
2533 virtual int32_t count(UErrorCode& status) const override {
2534 if (U_FAILURE(status)) { return 0; }
2535 const char *kw = keywords.data();
2536 int32_t result = 0;
2537 while(*kw) {
2538 if (uloc_toUnicodeLocaleKeyuloc_toUnicodeLocaleKey_77(kw) != nullptr) {
2539 result++;
2540 }
2541 kw += uprv_strlen(kw):: strlen(kw)+1;
2542 }
2543 return result;
2544 }
2545};
2546
2547// Out-of-line virtual destructor to serve as the "key function".
2548UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
2549
2550StringEnumeration *
2551Locale::createKeywords(UErrorCode &status) const
2552{
2553 StringEnumeration *result = nullptr;
2554
2555 if (U_FAILURE(status)) {
2556 return result;
2557 }
2558
2559 const char* variantStart = uprv_strchr(fullName, '@'):: strchr(fullName, '@');
2560 const char* assignment = uprv_strchr(fullName, '='):: strchr(fullName, '=');
2561 if(variantStart) {
2562 if(assignment > variantStart) {
2563 CharString keywords = ulocimp_getKeywordsulocimp_getKeywords_77(variantStart + 1, '@', false, status);
2564 if (U_SUCCESS(status) && !keywords.isEmpty()) {
2565 result = new KeywordEnumeration(keywords.data(), keywords.length(), 0, status);
2566 if (!result) {
2567 status = U_MEMORY_ALLOCATION_ERROR;
2568 }
2569 }
2570 } else {
2571 status = U_INVALID_FORMAT_ERROR;
2572 }
2573 }
2574 return result;
2575}
2576
2577StringEnumeration *
2578Locale::createUnicodeKeywords(UErrorCode &status) const
2579{
2580 StringEnumeration *result = nullptr;
2581
2582 if (U_FAILURE(status)) {
2583 return result;
2584 }
2585
2586 const char* variantStart = uprv_strchr(fullName, '@'):: strchr(fullName, '@');
2587 const char* assignment = uprv_strchr(fullName, '='):: strchr(fullName, '=');
2588 if(variantStart) {
2589 if(assignment > variantStart) {
2590 CharString keywords = ulocimp_getKeywordsulocimp_getKeywords_77(variantStart + 1, '@', false, status);
2591 if (U_SUCCESS(status) && !keywords.isEmpty()) {
2592 result = new UnicodeKeywordEnumeration(keywords.data(), keywords.length(), 0, status);
2593 if (!result) {
2594 status = U_MEMORY_ALLOCATION_ERROR;
2595 }
2596 }
2597 } else {
2598 status = U_INVALID_FORMAT_ERROR;
2599 }
2600 }
2601 return result;
2602}
2603
2604int32_t
2605Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
2606{
2607 return uloc_getKeywordValueuloc_getKeywordValue_77(fullName, keywordName, buffer, bufLen, &status);
2608}
2609
2610void
2611Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
2612 if (U_FAILURE(status)) {
2613 return;
2614 }
2615
2616 if (fIsBogus) {
2617 status = U_ILLEGAL_ARGUMENT_ERROR;
2618 return;
2619 }
2620
2621 ulocimp_getKeywordValueulocimp_getKeywordValue_77(fullName, keywordName, sink, status);
2622}
2623
2624void
2625Locale::getUnicodeKeywordValue(StringPiece keywordName,
2626 ByteSink& sink,
2627 UErrorCode& status) const {
2628 if (U_FAILURE(status)) {
2629 return;
2630 }
2631
2632 std::optional<std::string_view> legacy_key = ulocimp_toLegacyKeyWithFallbackulocimp_toLegacyKeyWithFallback_77(keywordName);
2633 if (!legacy_key.has_value()) {
2634 status = U_ILLEGAL_ARGUMENT_ERROR;
2635 return;
2636 }
2637
2638 auto legacy_value = getKeywordValue<CharString>(*legacy_key, status);
2639
2640 if (U_FAILURE(status)) {
2641 return;
2642 }
2643
2644 std::optional<std::string_view> unicode_value =
2645 ulocimp_toBcpTypeWithFallbackulocimp_toBcpTypeWithFallback_77(keywordName, legacy_value.toStringPiece());
2646 if (!unicode_value.has_value()) {
2647 status = U_ILLEGAL_ARGUMENT_ERROR;
2648 return;
2649 }
2650
2651 sink.Append(unicode_value->data(), static_cast<int32_t>(unicode_value->size()));
2652}
2653
2654void
2655Locale::setKeywordValue(StringPiece keywordName,
2656 StringPiece keywordValue,
2657 UErrorCode& status) {
2658 if (U_FAILURE(status)) { return; }
2659 if (keywordName.empty()) {
2660 status = U_ILLEGAL_ARGUMENT_ERROR;
2661 return;
2662 }
2663 if (status == U_STRING_NOT_TERMINATED_WARNING) {
2664 status = U_ZERO_ERROR;
2665 }
2666
2667 int32_t length = static_cast<int32_t>(uprv_strlen(fullName):: strlen(fullName));
2668 int32_t capacity = fullName == fullNameBuffer ? ULOC_FULLNAME_CAPACITY157 : length + 1;
2669
2670 const char* start = locale_getKeywordsStartlocale_getKeywordsStart_77(fullName);
2671 int32_t offset = start == nullptr ? length : start - fullName;
2672
2673 for (;;) {
2674 // Remove -1 from the capacity so that this function can guarantee NUL termination.
2675 CheckedArrayByteSink sink(fullName + offset, capacity - offset - 1);
2676
2677 int32_t reslen = ulocimp_setKeywordValueulocimp_setKeywordValue_77(
2678 {fullName + offset, static_cast<std::string_view::size_type>(length - offset)},
2679 keywordName,
2680 keywordValue,
2681 sink,
2682 status);
2683
2684 if (status == U_BUFFER_OVERFLOW_ERROR) {
2685 capacity = reslen + offset + 1;
2686 char* newFullName = static_cast<char*>(uprv_mallocuprv_malloc_77(capacity));
2687 if (newFullName == nullptr) {
2688 status = U_MEMORY_ALLOCATION_ERROR;
2689 return;
2690 }
2691 uprv_memcpy(newFullName, fullName, length + 1)do { clang diagnostic push clang diagnostic ignored "-Waddress"
(static_cast <bool> (newFullName != __null) ? void (0
) : __assert_fail ("newFullName != __null", __builtin_FILE ()
, __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); (static_cast
<bool> (fullName != __null) ? void (0) : __assert_fail
("fullName != __null", __builtin_FILE (), __builtin_LINE (),
__extension__ __PRETTY_FUNCTION__)); clang diagnostic pop ::
memcpy(newFullName, fullName, length + 1); } while (false)
;
2692 if (fullName != fullNameBuffer) {
2693 if (baseName == fullName) {
2694 baseName = newFullName; // baseName should not point to freed memory.
2695 }
2696 // if fullName is already on the heap, need to free it.
2697 uprv_freeuprv_free_77(fullName);
2698 }
2699 fullName = newFullName;
2700 status = U_ZERO_ERROR;
2701 continue;
2702 }
2703
2704 if (U_FAILURE(status)) { return; }
2705 u_terminateCharsu_terminateChars_77(fullName, capacity, reslen + offset, &status);
2706 break;
2707 }
2708
2709 if (baseName == fullName) {
2710 // May have added the first keyword, meaning that the fullName is no longer also the baseName.
2711 initBaseName(status);
2712 }
2713}
2714
2715void
2716Locale::setUnicodeKeywordValue(StringPiece keywordName,
2717 StringPiece keywordValue,
2718 UErrorCode& status) {
2719 if (U_FAILURE(status)) {
2720 return;
2721 }
2722
2723 std::optional<std::string_view> legacy_key = ulocimp_toLegacyKeyWithFallbackulocimp_toLegacyKeyWithFallback_77(keywordName);
2724 if (!legacy_key.has_value()) {
2725 status = U_ILLEGAL_ARGUMENT_ERROR;
2726 return;
2727 }
2728
2729 std::string_view value;
2730
2731 if (!keywordValue.empty()) {
2732 std::optional<std::string_view> legacy_value =
2733 ulocimp_toLegacyTypeWithFallbackulocimp_toLegacyTypeWithFallback_77(keywordName, keywordValue);
2734 if (!legacy_value.has_value()) {
2735 status = U_ILLEGAL_ARGUMENT_ERROR;
2736 return;
2737 }
2738 value = *legacy_value;
2739 }
2740
2741 setKeywordValue(*legacy_key, value, status);
2742}
2743
2744const char *
2745Locale::getBaseName() const {
2746 return baseName;
2747}
2748
2749Locale::Iterator::~Iterator() = default;
2750
2751//eof
2752U_NAMESPACE_END}