Bug Summary

File:root/firefox-clang/intl/icu/source/common/ucnvscsu.cpp
Warning:line 1941, column 9
Value stored to 'targetCapacity' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ucnvscsu.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/common -fcoverage-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/common -resource-dir /usr/lib/llvm-21/lib/clang/21 -include /root/firefox-clang/config/gcc_hidden.h -include /root/firefox-clang/obj-x86_64-pc-linux-gnu/mozilla-config.h -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -D U_COMMON_IMPLEMENTATION -D _LIBCPP_DISABLE_DEPRECATION_WARNINGS -D U_USING_ICU_NAMESPACE=0 -D U_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 -D U_HIDE_OBSOLETE_UTF_OLD_H=1 -D UCONFIG_NO_LEGACY_CONVERSION -D UCONFIG_NO_TRANSLITERATION -D UCONFIG_NO_REGULAR_EXPRESSIONS -D UCONFIG_NO_BREAK_ITERATION -D UCONFIG_NO_IDNA -D UCONFIG_NO_MF2 -D U_CHARSET_IS_UTF8 -D UNISTR_FROM_CHAR_EXPLICIT=explicit -D UNISTR_FROM_STRING_EXPLICIT=explicit -D U_ENABLE_DYLOAD=0 -D U_DEBUG=1 -I /root/firefox-clang/config/external/icu/common -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/common -I /root/firefox-clang/intl/icu/source/i18n -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/x86_64-linux-gnu/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/backward -internal-isystem /usr/lib/llvm-21/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=pessimizing-move -Wno-error=large-by-value-copy=128 -Wno-error=implicit-int-float-conversion -Wno-error=thread-safety-analysis -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -Wno-comma -Wno-implicit-const-int-float-conversion -Wno-macro-redefined -Wno-microsoft-include -Wno-tautological-unsigned-enum-zero-compare -Wno-unreachable-code-loop-increment -Wno-unreachable-code-return -fdeprecated-macro -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fno-sized-deallocation -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-06-27-100320-3286336-1 -x c++ /root/firefox-clang/intl/icu/source/common/ucnvscsu.cpp
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*
6* Copyright (C) 2000-2016, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10* file name: ucnvscsu.c
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2000nov18
16* created by: Markus W. Scherer
17*
18* This is an implementation of the Standard Compression Scheme for Unicode
19* as defined in https://www.unicode.org/reports/tr6/ .
20* Reserved commands and window settings are treated as illegal sequences and
21* will result in callback calls.
22*/
23
24#include "unicode/utypes.h"
25
26#if !UCONFIG_NO_CONVERSION0 && !UCONFIG_ONLY_HTML_CONVERSION0
27
28#include "unicode/ucnv.h"
29#include "unicode/ucnv_cb.h"
30#include "unicode/utf16.h"
31#include "ucnv_bld.h"
32#include "ucnv_cnv.h"
33#include "cmemory.h"
34
35/* SCSU definitions --------------------------------------------------------- */
36
37/* SCSU command byte values */
38enum {
39 SQ0=0x01, /* Quote from window pair 0 */
40 SQ7=0x08, /* Quote from window pair 7 */
41 SDX=0x0B, /* Define a window as extended */
42 Srs=0x0C, /* reserved */
43 SQU=0x0E, /* Quote a single Unicode character */
44 SCU=0x0F, /* Change to Unicode mode */
45 SC0=0x10, /* Select window 0 */
46 SC7=0x17, /* Select window 7 */
47 SD0=0x18, /* Define and select window 0 */
48 SD7=0x1F, /* Define and select window 7 */
49
50 UC0=0xE0, /* Select window 0 */
51 UC7=0xE7, /* Select window 7 */
52 UD0=0xE8, /* Define and select window 0 */
53 UD7=0xEF, /* Define and select window 7 */
54 UQU=0xF0, /* Quote a single Unicode character */
55 UDX=0xF1, /* Define a Window as extended */
56 Urs=0xF2 /* reserved */
57};
58
59enum {
60 /*
61 * Unicode code points from 3400 to E000 are not adressible by
62 * dynamic window, since in these areas no short run alphabets are
63 * found. Therefore add gapOffset to all values from gapThreshold.
64 */
65 gapThreshold=0x68,
66 gapOffset=0xAC00,
67
68 /* values between reservedStart and fixedThreshold are reserved */
69 reservedStart=0xA8,
70
71 /* use table of predefined fixed offsets for values from fixedThreshold */
72 fixedThreshold=0xF9
73};
74
75/* constant offsets for the 8 static windows */
76static const uint32_t staticOffsets[8]={
77 0x0000, /* ASCII for quoted tags */
78 0x0080, /* Latin - 1 Supplement (for access to punctuation) */
79 0x0100, /* Latin Extended-A */
80 0x0300, /* Combining Diacritical Marks */
81 0x2000, /* General Punctuation */
82 0x2080, /* Currency Symbols */
83 0x2100, /* Letterlike Symbols and Number Forms */
84 0x3000 /* CJK Symbols and punctuation */
85};
86
87/* initial offsets for the 8 dynamic (sliding) windows */
88static const uint32_t initialDynamicOffsets[8]={
89 0x0080, /* Latin-1 */
90 0x00C0, /* Latin Extended A */
91 0x0400, /* Cyrillic */
92 0x0600, /* Arabic */
93 0x0900, /* Devanagari */
94 0x3040, /* Hiragana */
95 0x30A0, /* Katakana */
96 0xFF00 /* Fullwidth ASCII */
97};
98
99/* Table of fixed predefined Offsets */
100static const uint32_t fixedOffsets[]={
101 /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
102 /* 0xFA */ 0x0250, /* IPA extensions */
103 /* 0xFB */ 0x0370, /* Greek */
104 /* 0xFC */ 0x0530, /* Armenian */
105 /* 0xFD */ 0x3040, /* Hiragana */
106 /* 0xFE */ 0x30A0, /* Katakana */
107 /* 0xFF */ 0xFF60 /* Halfwidth Katakana */
108};
109
110/* state values */
111enum {
112 readCommand,
113 quotePairOne,
114 quotePairTwo,
115 quoteOne,
116 definePairOne,
117 definePairTwo,
118 defineOne
119};
120
121typedef struct SCSUData {
122 /* dynamic window offsets, initialize to default values from initialDynamicOffsets */
123 uint32_t toUDynamicOffsets[8];
124 uint32_t fromUDynamicOffsets[8];
125
126 /* state machine state - toUnicode */
127 UBool toUIsSingleByteMode;
128 uint8_t toUState;
129 int8_t toUQuoteWindow, toUDynamicWindow;
130 uint8_t toUByteOne;
131 uint8_t toUPadding[3];
132
133 /* state machine state - fromUnicode */
134 UBool fromUIsSingleByteMode;
135 int8_t fromUDynamicWindow;
136
137 /*
138 * windowUse[] keeps track of the use of the dynamic windows:
139 * At nextWindowUseIndex there is the least recently used window,
140 * and the following windows (in a wrapping manner) are more and more
141 * recently used.
142 * At nextWindowUseIndex-1 there is the most recently used window.
143 */
144 uint8_t locale;
145 int8_t nextWindowUseIndex;
146 int8_t windowUse[8];
147} SCSUData;
148
149static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
150static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
151
152enum {
153 lGeneric, l_ja
154};
155
156/* SCSU setup functions ----------------------------------------------------- */
157U_CDECL_BEGINextern "C" {
158static void U_CALLCONV
159_SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
160 SCSUData *scsu=(SCSUData *)cnv->extraInfo;
161
162 if(choice<=UCNV_RESET_TO_UNICODE) {
163 /* reset toUnicode */
164 uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32)do { clang diagnostic push clang diagnostic ignored "-Waddress"
(static_cast <bool> (scsu->toUDynamicOffsets != __null
) ? void (0) : __assert_fail ("scsu->toUDynamicOffsets != __null"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
)); (static_cast <bool> (initialDynamicOffsets != __null
) ? void (0) : __assert_fail ("initialDynamicOffsets != __null"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
)); clang diagnostic pop :: memcpy(scsu->toUDynamicOffsets
, initialDynamicOffsets, 32); } while (false)
;
165
166 scsu->toUIsSingleByteMode=true;
167 scsu->toUState=readCommand;
168 scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
169 scsu->toUByteOne=0;
170
171 cnv->toULength=0;
172 }
173 if(choice!=UCNV_RESET_TO_UNICODE) {
174 /* reset fromUnicode */
175 uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32)do { clang diagnostic push clang diagnostic ignored "-Waddress"
(static_cast <bool> (scsu->fromUDynamicOffsets != __null
) ? void (0) : __assert_fail ("scsu->fromUDynamicOffsets != __null"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
)); (static_cast <bool> (initialDynamicOffsets != __null
) ? void (0) : __assert_fail ("initialDynamicOffsets != __null"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
)); clang diagnostic pop :: memcpy(scsu->fromUDynamicOffsets
, initialDynamicOffsets, 32); } while (false)
;
176
177 scsu->fromUIsSingleByteMode=true;
178 scsu->fromUDynamicWindow=0;
179
180 scsu->nextWindowUseIndex=0;
181 switch(scsu->locale) {
182 case l_ja:
183 uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8)do { clang diagnostic push clang diagnostic ignored "-Waddress"
(static_cast <bool> (scsu->windowUse != __null) ? void
(0) : __assert_fail ("scsu->windowUse != __null", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); (
static_cast <bool> (initialWindowUse_ja != __null) ? void
(0) : __assert_fail ("initialWindowUse_ja != __null", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); clang
diagnostic pop :: memcpy(scsu->windowUse, initialWindowUse_ja
, 8); } while (false)
;
184 break;
185 default:
186 uprv_memcpy(scsu->windowUse, initialWindowUse, 8)do { clang diagnostic push clang diagnostic ignored "-Waddress"
(static_cast <bool> (scsu->windowUse != __null) ? void
(0) : __assert_fail ("scsu->windowUse != __null", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); (
static_cast <bool> (initialWindowUse != __null) ? void (
0) : __assert_fail ("initialWindowUse != __null", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); clang
diagnostic pop :: memcpy(scsu->windowUse, initialWindowUse
, 8); } while (false)
;
187 break;
188 }
189
190 cnv->fromUChar32=0;
191 }
192}
193
194static void U_CALLCONV
195_SCSUOpen(UConverter *cnv,
196 UConverterLoadArgs *pArgs,
197 UErrorCode *pErrorCode) {
198 const char *locale=pArgs->locale;
199 if(pArgs->onlyTestIsLoadable) {
200 return;
201 }
202 cnv->extraInfo=uprv_mallocuprv_malloc_77(sizeof(SCSUData));
203 if(cnv->extraInfo!=nullptr) {
204 if(locale!=nullptr && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
205 ((SCSUData *)cnv->extraInfo)->locale=l_ja;
206 } else {
207 ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
208 }
209 _SCSUReset(cnv, UCNV_RESET_BOTH);
210 } else {
211 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
212 }
213
214 /* Set the substitution character U+fffd as a Unicode string. */
215 cnv->subUChars[0]=0xfffd;
216 cnv->subCharLen=-1;
217}
218
219static void U_CALLCONV
220_SCSUClose(UConverter *cnv) {
221 if(cnv->extraInfo!=nullptr) {
222 if(!cnv->isExtraLocal) {
223 uprv_freeuprv_free_77(cnv->extraInfo);
224 }
225 cnv->extraInfo=nullptr;
226 }
227}
228
229/* SCSU-to-Unicode conversion functions ------------------------------------- */
230
231static void U_CALLCONV
232_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
233 UErrorCode *pErrorCode) {
234 UConverter *cnv;
235 SCSUData *scsu;
236 const uint8_t *source, *sourceLimit;
237 char16_t *target;
238 const char16_t *targetLimit;
239 int32_t *offsets;
240 UBool isSingleByteMode;
241 uint8_t state, byteOne;
242 int8_t quoteWindow, dynamicWindow;
243
244 int32_t sourceIndex, nextSourceIndex;
245
246 uint8_t b;
247
248 /* set up the local pointers */
249 cnv=pArgs->converter;
250 scsu=(SCSUData *)cnv->extraInfo;
251
252 source=(const uint8_t *)pArgs->source;
253 sourceLimit=(const uint8_t *)pArgs->sourceLimit;
254 target=pArgs->target;
255 targetLimit=pArgs->targetLimit;
256 offsets=pArgs->offsets;
257
258 /* get the state machine state */
259 isSingleByteMode=scsu->toUIsSingleByteMode;
260 state=scsu->toUState;
261 quoteWindow=scsu->toUQuoteWindow;
262 dynamicWindow=scsu->toUDynamicWindow;
263 byteOne=scsu->toUByteOne;
264
265 /* sourceIndex=-1 if the current character began in the previous buffer */
266 sourceIndex=state==readCommand ? 0 : -1;
267 nextSourceIndex=0;
268
269 /*
270 * conversion "loop"
271 *
272 * For performance, this is not a normal C loop.
273 * Instead, there are two code blocks for the two SCSU modes.
274 * The function branches to either one, and a change of the mode is done with a goto to
275 * the other branch.
276 *
277 * Each branch has two conventional loops:
278 * - a fast-path loop for the most common codes in the mode
279 * - a loop for all other codes in the mode
280 * When the fast-path runs into a code that it cannot handle, its loop ends and it
281 * runs into the following loop to handle the other codes.
282 * The end of the input or output buffer is also handled by the slower loop.
283 * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
284 *
285 * The callback handling is done by returning with an error code.
286 * The conversion framework actually calls the callback function.
287 */
288 if(isSingleByteMode) {
289 /* fast path for single-byte mode */
290 if(state==readCommand) {
291fastSingle:
292 while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
293 ++source;
294 ++nextSourceIndex;
295 if(b<=0x7f) {
296 /* write US-ASCII graphic character or DEL */
297 *target++=(char16_t)b;
298 if(offsets!=nullptr) {
299 *offsets++=sourceIndex;
300 }
301 } else {
302 /* write from dynamic window */
303 uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
304 if(c<=0xffff) {
305 *target++=(char16_t)c;
306 if(offsets!=nullptr) {
307 *offsets++=sourceIndex;
308 }
309 } else {
310 /* output surrogate pair */
311 *target++=(char16_t)(0xd7c0+(c>>10));
312 if(target<targetLimit) {
313 *target++=(char16_t)(0xdc00|(c&0x3ff));
314 if(offsets!=nullptr) {
315 *offsets++=sourceIndex;
316 *offsets++=sourceIndex;
317 }
318 } else {
319 /* target overflow */
320 if(offsets!=nullptr) {
321 *offsets++=sourceIndex;
322 }
323 cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
324 cnv->UCharErrorBufferLength=1;
325 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
326 goto endloop;
327 }
328 }
329 }
330 sourceIndex=nextSourceIndex;
331 }
332 }
333
334 /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
335singleByteMode:
336 while(source<sourceLimit) {
337 if(target>=targetLimit) {
338 /* target is full */
339 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
340 break;
341 }
342 b=*source++;
343 ++nextSourceIndex;
344 switch(state) {
345 case readCommand:
346 /* redundant conditions are commented out */
347 /* here: b<0x20 because otherwise we would be in fastSingle */
348 if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
349 /* CR/LF/TAB/NUL */
350 *target++=(char16_t)b;
351 if(offsets!=nullptr) {
352 *offsets++=sourceIndex;
353 }
354 sourceIndex=nextSourceIndex;
355 goto fastSingle;
356 } else if(SC0<=b) {
357 if(b<=SC7) {
358 dynamicWindow=(int8_t)(b-SC0);
359 sourceIndex=nextSourceIndex;
360 goto fastSingle;
361 } else /* if(SD0<=b && b<=SD7) */ {
362 dynamicWindow=(int8_t)(b-SD0);
363 state=defineOne;
364 }
365 } else if(/* SQ0<=b && */ b<=SQ7) {
366 quoteWindow=(int8_t)(b-SQ0);
367 state=quoteOne;
368 } else if(b==SDX) {
369 state=definePairOne;
370 } else if(b==SQU) {
371 state=quotePairOne;
372 } else if(b==SCU) {
373 sourceIndex=nextSourceIndex;
374 isSingleByteMode=false;
375 goto fastUnicode;
376 } else /* Srs */ {
377 /* callback(illegal) */
378 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
379 cnv->toUBytes[0]=b;
380 cnv->toULength=1;
381 goto endloop;
382 }
383
384 /* store the first byte of a multibyte sequence in toUBytes[] */
385 cnv->toUBytes[0]=b;
386 cnv->toULength=1;
387 break;
388 case quotePairOne:
389 byteOne=b;
390 cnv->toUBytes[1]=b;
391 cnv->toULength=2;
392 state=quotePairTwo;
393 break;
394 case quotePairTwo:
395 *target++=(char16_t)((byteOne<<8)|b);
396 if(offsets!=nullptr) {
397 *offsets++=sourceIndex;
398 }
399 sourceIndex=nextSourceIndex;
400 state=readCommand;
401 goto fastSingle;
402 case quoteOne:
403 if(b<0x80) {
404 /* all static offsets are in the BMP */
405 *target++=(char16_t)(staticOffsets[quoteWindow]+b);
406 if(offsets!=nullptr) {
407 *offsets++=sourceIndex;
408 }
409 } else {
410 /* write from dynamic window */
411 uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
412 if(c<=0xffff) {
413 *target++=(char16_t)c;
414 if(offsets!=nullptr) {
415 *offsets++=sourceIndex;
416 }
417 } else {
418 /* output surrogate pair */
419 *target++=(char16_t)(0xd7c0+(c>>10));
420 if(target<targetLimit) {
421 *target++=(char16_t)(0xdc00|(c&0x3ff));
422 if(offsets!=nullptr) {
423 *offsets++=sourceIndex;
424 *offsets++=sourceIndex;
425 }
426 } else {
427 /* target overflow */
428 if(offsets!=nullptr) {
429 *offsets++=sourceIndex;
430 }
431 cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
432 cnv->UCharErrorBufferLength=1;
433 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
434 goto endloop;
435 }
436 }
437 }
438 sourceIndex=nextSourceIndex;
439 state=readCommand;
440 goto fastSingle;
441 case definePairOne:
442 dynamicWindow=(int8_t)((b>>5)&7);
443 byteOne=(uint8_t)(b&0x1f);
444 cnv->toUBytes[1]=b;
445 cnv->toULength=2;
446 state=definePairTwo;
447 break;
448 case definePairTwo:
449 scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
450 sourceIndex=nextSourceIndex;
451 state=readCommand;
452 goto fastSingle;
453 case defineOne:
454 if(b==0) {
455 /* callback(illegal): Reserved window offset value 0 */
456 cnv->toUBytes[1]=b;
457 cnv->toULength=2;
458 goto endloop;
459 } else if(b<gapThreshold) {
460 scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
461 } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
462 scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
463 } else if(b>=fixedThreshold) {
464 scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
465 } else {
466 /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
467 cnv->toUBytes[1]=b;
468 cnv->toULength=2;
469 goto endloop;
470 }
471 sourceIndex=nextSourceIndex;
472 state=readCommand;
473 goto fastSingle;
474 }
475 }
476 } else {
477 /* fast path for Unicode mode */
478 if(state==readCommand) {
479fastUnicode:
480 while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
481 *target++=(char16_t)((b<<8)|source[1]);
482 if(offsets!=nullptr) {
483 *offsets++=sourceIndex;
484 }
485 sourceIndex=nextSourceIndex;
486 nextSourceIndex+=2;
487 source+=2;
488 }
489 }
490
491 /* normal state machine for Unicode mode */
492/* unicodeByteMode: */
493 while(source<sourceLimit) {
494 if(target>=targetLimit) {
495 /* target is full */
496 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
497 break;
498 }
499 b=*source++;
500 ++nextSourceIndex;
501 switch(state) {
502 case readCommand:
503 if((uint8_t)(b-UC0)>(Urs-UC0)) {
504 byteOne=b;
505 cnv->toUBytes[0]=b;
506 cnv->toULength=1;
507 state=quotePairTwo;
508 } else if(/* UC0<=b && */ b<=UC7) {
509 dynamicWindow=(int8_t)(b-UC0);
510 sourceIndex=nextSourceIndex;
511 isSingleByteMode=true;
512 goto fastSingle;
513 } else if(/* UD0<=b && */ b<=UD7) {
514 dynamicWindow=(int8_t)(b-UD0);
515 isSingleByteMode=true;
516 cnv->toUBytes[0]=b;
517 cnv->toULength=1;
518 state=defineOne;
519 goto singleByteMode;
520 } else if(b==UDX) {
521 isSingleByteMode=true;
522 cnv->toUBytes[0]=b;
523 cnv->toULength=1;
524 state=definePairOne;
525 goto singleByteMode;
526 } else if(b==UQU) {
527 cnv->toUBytes[0]=b;
528 cnv->toULength=1;
529 state=quotePairOne;
530 } else /* Urs */ {
531 /* callback(illegal) */
532 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
533 cnv->toUBytes[0]=b;
534 cnv->toULength=1;
535 goto endloop;
536 }
537 break;
538 case quotePairOne:
539 byteOne=b;
540 cnv->toUBytes[1]=b;
541 cnv->toULength=2;
542 state=quotePairTwo;
543 break;
544 case quotePairTwo:
545 *target++=(char16_t)((byteOne<<8)|b);
546 if(offsets!=nullptr) {
547 *offsets++=sourceIndex;
548 }
549 sourceIndex=nextSourceIndex;
550 state=readCommand;
551 goto fastUnicode;
552 }
553 }
554 }
555endloop:
556
557 /* set the converter state back into UConverter */
558 if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
559 /* reset to deal with the next character */
560 state=readCommand;
561 } else if(state==readCommand) {
562 /* not in a multi-byte sequence, reset toULength */
563 cnv->toULength=0;
564 }
565 scsu->toUIsSingleByteMode=isSingleByteMode;
566 scsu->toUState=state;
567 scsu->toUQuoteWindow=quoteWindow;
568 scsu->toUDynamicWindow=dynamicWindow;
569 scsu->toUByteOne=byteOne;
570
571 /* write back the updated pointers */
572 pArgs->source=(const char *)source;
573 pArgs->target=target;
574 pArgs->offsets=offsets;
575}
576
577/*
578 * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
579 * If a change is made in the original function, then either
580 * change this function the same way or
581 * re-copy the original function and remove the variables
582 * offsets, sourceIndex, and nextSourceIndex.
583 */
584static void U_CALLCONV
585_SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
586 UErrorCode *pErrorCode) {
587 UConverter *cnv;
588 SCSUData *scsu;
589 const uint8_t *source, *sourceLimit;
590 char16_t *target;
591 const char16_t *targetLimit;
592 UBool isSingleByteMode;
593 uint8_t state, byteOne;
594 int8_t quoteWindow, dynamicWindow;
595
596 uint8_t b;
597
598 /* set up the local pointers */
599 cnv=pArgs->converter;
600 scsu=(SCSUData *)cnv->extraInfo;
601
602 source=(const uint8_t *)pArgs->source;
603 sourceLimit=(const uint8_t *)pArgs->sourceLimit;
604 target=pArgs->target;
605 targetLimit=pArgs->targetLimit;
606
607 /* get the state machine state */
608 isSingleByteMode=scsu->toUIsSingleByteMode;
609 state=scsu->toUState;
610 quoteWindow=scsu->toUQuoteWindow;
611 dynamicWindow=scsu->toUDynamicWindow;
612 byteOne=scsu->toUByteOne;
613
614 /*
615 * conversion "loop"
616 *
617 * For performance, this is not a normal C loop.
618 * Instead, there are two code blocks for the two SCSU modes.
619 * The function branches to either one, and a change of the mode is done with a goto to
620 * the other branch.
621 *
622 * Each branch has two conventional loops:
623 * - a fast-path loop for the most common codes in the mode
624 * - a loop for all other codes in the mode
625 * When the fast-path runs into a code that it cannot handle, its loop ends and it
626 * runs into the following loop to handle the other codes.
627 * The end of the input or output buffer is also handled by the slower loop.
628 * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
629 *
630 * The callback handling is done by returning with an error code.
631 * The conversion framework actually calls the callback function.
632 */
633 if(isSingleByteMode) {
634 /* fast path for single-byte mode */
635 if(state==readCommand) {
636fastSingle:
637 while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
638 ++source;
639 if(b<=0x7f) {
640 /* write US-ASCII graphic character or DEL */
641 *target++=(char16_t)b;
642 } else {
643 /* write from dynamic window */
644 uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
645 if(c<=0xffff) {
646 *target++=(char16_t)c;
647 } else {
648 /* output surrogate pair */
649 *target++=(char16_t)(0xd7c0+(c>>10));
650 if(target<targetLimit) {
651 *target++=(char16_t)(0xdc00|(c&0x3ff));
652 } else {
653 /* target overflow */
654 cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
655 cnv->UCharErrorBufferLength=1;
656 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
657 goto endloop;
658 }
659 }
660 }
661 }
662 }
663
664 /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
665singleByteMode:
666 while(source<sourceLimit) {
667 if(target>=targetLimit) {
668 /* target is full */
669 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
670 break;
671 }
672 b=*source++;
673 switch(state) {
674 case readCommand:
675 /* redundant conditions are commented out */
676 /* here: b<0x20 because otherwise we would be in fastSingle */
677 if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
678 /* CR/LF/TAB/NUL */
679 *target++=(char16_t)b;
680 goto fastSingle;
681 } else if(SC0<=b) {
682 if(b<=SC7) {
683 dynamicWindow=(int8_t)(b-SC0);
684 goto fastSingle;
685 } else /* if(SD0<=b && b<=SD7) */ {
686 dynamicWindow=(int8_t)(b-SD0);
687 state=defineOne;
688 }
689 } else if(/* SQ0<=b && */ b<=SQ7) {
690 quoteWindow=(int8_t)(b-SQ0);
691 state=quoteOne;
692 } else if(b==SDX) {
693 state=definePairOne;
694 } else if(b==SQU) {
695 state=quotePairOne;
696 } else if(b==SCU) {
697 isSingleByteMode=false;
698 goto fastUnicode;
699 } else /* Srs */ {
700 /* callback(illegal) */
701 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
702 cnv->toUBytes[0]=b;
703 cnv->toULength=1;
704 goto endloop;
705 }
706
707 /* store the first byte of a multibyte sequence in toUBytes[] */
708 cnv->toUBytes[0]=b;
709 cnv->toULength=1;
710 break;
711 case quotePairOne:
712 byteOne=b;
713 cnv->toUBytes[1]=b;
714 cnv->toULength=2;
715 state=quotePairTwo;
716 break;
717 case quotePairTwo:
718 *target++=(char16_t)((byteOne<<8)|b);
719 state=readCommand;
720 goto fastSingle;
721 case quoteOne:
722 if(b<0x80) {
723 /* all static offsets are in the BMP */
724 *target++=(char16_t)(staticOffsets[quoteWindow]+b);
725 } else {
726 /* write from dynamic window */
727 uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
728 if(c<=0xffff) {
729 *target++=(char16_t)c;
730 } else {
731 /* output surrogate pair */
732 *target++=(char16_t)(0xd7c0+(c>>10));
733 if(target<targetLimit) {
734 *target++=(char16_t)(0xdc00|(c&0x3ff));
735 } else {
736 /* target overflow */
737 cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
738 cnv->UCharErrorBufferLength=1;
739 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
740 goto endloop;
741 }
742 }
743 }
744 state=readCommand;
745 goto fastSingle;
746 case definePairOne:
747 dynamicWindow=(int8_t)((b>>5)&7);
748 byteOne=(uint8_t)(b&0x1f);
749 cnv->toUBytes[1]=b;
750 cnv->toULength=2;
751 state=definePairTwo;
752 break;
753 case definePairTwo:
754 scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
755 state=readCommand;
756 goto fastSingle;
757 case defineOne:
758 if(b==0) {
759 /* callback(illegal): Reserved window offset value 0 */
760 cnv->toUBytes[1]=b;
761 cnv->toULength=2;
762 goto endloop;
763 } else if(b<gapThreshold) {
764 scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
765 } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
766 scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
767 } else if(b>=fixedThreshold) {
768 scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
769 } else {
770 /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
771 cnv->toUBytes[1]=b;
772 cnv->toULength=2;
773 goto endloop;
774 }
775 state=readCommand;
776 goto fastSingle;
777 }
778 }
779 } else {
780 /* fast path for Unicode mode */
781 if(state==readCommand) {
782fastUnicode:
783 while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
784 *target++=(char16_t)((b<<8)|source[1]);
785 source+=2;
786 }
787 }
788
789 /* normal state machine for Unicode mode */
790/* unicodeByteMode: */
791 while(source<sourceLimit) {
792 if(target>=targetLimit) {
793 /* target is full */
794 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
795 break;
796 }
797 b=*source++;
798 switch(state) {
799 case readCommand:
800 if((uint8_t)(b-UC0)>(Urs-UC0)) {
801 byteOne=b;
802 cnv->toUBytes[0]=b;
803 cnv->toULength=1;
804 state=quotePairTwo;
805 } else if(/* UC0<=b && */ b<=UC7) {
806 dynamicWindow=(int8_t)(b-UC0);
807 isSingleByteMode=true;
808 goto fastSingle;
809 } else if(/* UD0<=b && */ b<=UD7) {
810 dynamicWindow=(int8_t)(b-UD0);
811 isSingleByteMode=true;
812 cnv->toUBytes[0]=b;
813 cnv->toULength=1;
814 state=defineOne;
815 goto singleByteMode;
816 } else if(b==UDX) {
817 isSingleByteMode=true;
818 cnv->toUBytes[0]=b;
819 cnv->toULength=1;
820 state=definePairOne;
821 goto singleByteMode;
822 } else if(b==UQU) {
823 cnv->toUBytes[0]=b;
824 cnv->toULength=1;
825 state=quotePairOne;
826 } else /* Urs */ {
827 /* callback(illegal) */
828 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
829 cnv->toUBytes[0]=b;
830 cnv->toULength=1;
831 goto endloop;
832 }
833 break;
834 case quotePairOne:
835 byteOne=b;
836 cnv->toUBytes[1]=b;
837 cnv->toULength=2;
838 state=quotePairTwo;
839 break;
840 case quotePairTwo:
841 *target++=(char16_t)((byteOne<<8)|b);
842 state=readCommand;
843 goto fastUnicode;
844 }
845 }
846 }
847endloop:
848
849 /* set the converter state back into UConverter */
850 if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
851 /* reset to deal with the next character */
852 state=readCommand;
853 } else if(state==readCommand) {
854 /* not in a multi-byte sequence, reset toULength */
855 cnv->toULength=0;
856 }
857 scsu->toUIsSingleByteMode=isSingleByteMode;
858 scsu->toUState=state;
859 scsu->toUQuoteWindow=quoteWindow;
860 scsu->toUDynamicWindow=dynamicWindow;
861 scsu->toUByteOne=byteOne;
862
863 /* write back the updated pointers */
864 pArgs->source=(const char *)source;
865 pArgs->target=target;
866}
867U_CDECL_END}
868/* SCSU-from-Unicode conversion functions ----------------------------------- */
869
870/*
871 * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
872 * reasonable results. The lookahead is minimal.
873 * Many cases are simple:
874 * A character fits directly into the current mode, a dynamic or static window,
875 * or is not compressible. These cases are tested first.
876 * Real compression heuristics are applied to the rest, in code branches for
877 * single/Unicode mode and BMP/supplementary code points.
878 * The heuristics used here are extremely simple.
879 */
880
881/* get the number of the window that this character is in, or -1 */
882static int8_t
883getWindow(const uint32_t offsets[8], uint32_t c) {
884 int i;
885 for(i=0; i<8; ++i) {
886 if (c - offsets[i] <= 0x7f) {
887 return static_cast<int8_t>(i);
888 }
889 }
890 return -1;
891}
892
893/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
894static UBool
895isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
896 return c<=offset+0x7f &&
897 (c>=offset || (c<=0x7f &&
898 (c>=0x20 || (1UL<<c)&0x2601)));
899 /* binary 0010 0110 0000 0001,
900 check for b==0xd || b==0xa || b==9 || b==0 */
901}
902
903/*
904 * getNextDynamicWindow returns the next dynamic window to be redefined
905 */
906static int8_t
907getNextDynamicWindow(SCSUData *scsu) {
908 int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
909 if(++scsu->nextWindowUseIndex==8) {
910 scsu->nextWindowUseIndex=0;
911 }
912 return window;
913}
914
915/*
916 * useDynamicWindow() adjusts
917 * windowUse[] and nextWindowUseIndex for the algorithm to choose
918 * the next dynamic window to be defined;
919 * a subclass may override it and provide its own algorithm.
920 */
921static void
922useDynamicWindow(SCSUData *scsu, int8_t window) {
923 /*
924 * move the existing window, which just became the most recently used one,
925 * up in windowUse[] to nextWindowUseIndex-1
926 */
927
928 /* first, find the index of the window - backwards to favor the more recently used windows */
929 int i, j;
930
931 i=scsu->nextWindowUseIndex;
932 do {
933 if(--i<0) {
934 i=7;
935 }
936 } while(scsu->windowUse[i]!=window);
937
938 /* now copy each windowUse[i+1] to [i] */
939 j=i+1;
940 if(j==8) {
941 j=0;
942 }
943 while(j!=scsu->nextWindowUseIndex) {
944 scsu->windowUse[i]=scsu->windowUse[j];
945 i=j;
946 if(++j==8) { j=0; }
947 }
948
949 /* finally, set the window into the most recently used index */
950 scsu->windowUse[i]=window;
951}
952
953/*
954 * calculate the offset and the code for a dynamic window that contains the character
955 * takes fixed offsets into account
956 * the offset of the window is stored in the offset variable,
957 * the code is returned
958 *
959 * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code
960 */
961static int
962getDynamicOffset(uint32_t c, uint32_t *pOffset) {
963 int i;
964
965 for(i=0; i<7; ++i) {
966 if (c - fixedOffsets[i] <= 0x7f) {
967 *pOffset=fixedOffsets[i];
968 return 0xf9+i;
969 }
970 }
971
972 if(c<0x80) {
973 /* No dynamic window for US-ASCII. */
974 return -1;
975 } else if(c<0x3400 ||
976 c - 0x10000 < 0x14000 - 0x10000 ||
977 c - 0x1d000 <= 0x1ffff - 0x1d000
978 ) {
979 /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
980 *pOffset=c&0x7fffff80;
981 return static_cast<int>(c >> 7);
982 } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
983 /* For these characters we need to take the gapOffset into account. */
984 *pOffset=c&0x7fffff80;
985 return static_cast<int>((c - gapOffset) >> 7);
986 } else {
987 return -1;
988 }
989}
990U_CDECL_BEGINextern "C" {
991/*
992 * Idea for compression:
993 * - save SCSUData and other state before really starting work
994 * - at endloop, see if compression could be better with just unicode mode
995 * - don't do this if a callback has been called
996 * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
997 * - different buffer handling!
998 *
999 * Drawback or need for corrective handling:
1000 * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
1001 * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
1002 * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
1003 *
1004 * How to achieve both?
1005 * - Only replace the result after an SDX or SCU?
1006 */
1007
1008static void U_CALLCONV
1009_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
1010 UErrorCode *pErrorCode) {
1011 UConverter *cnv;
1012 SCSUData *scsu;
1013 const char16_t *source, *sourceLimit;
1014 uint8_t *target;
1015 int32_t targetCapacity;
1016 int32_t *offsets;
1017
1018 UBool isSingleByteMode;
1019 uint8_t dynamicWindow;
1020 uint32_t currentOffset;
1021
1022 uint32_t c, delta;
1023
1024 int32_t sourceIndex, nextSourceIndex;
1025
1026 int32_t length;
1027
1028 /* variables for compression heuristics */
1029 uint32_t offset;
1030 char16_t lead, trail;
1031 int code;
1032 int8_t window;
1033
1034 /* set up the local pointers */
1035 cnv=pArgs->converter;
1036 scsu=(SCSUData *)cnv->extraInfo;
1037
1038 /* set up the local pointers */
1039 source=pArgs->source;
1040 sourceLimit=pArgs->sourceLimit;
1041 target=(uint8_t *)pArgs->target;
1042 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1043 offsets=pArgs->offsets;
1044
1045 /* get the state machine state */
1046 isSingleByteMode=scsu->fromUIsSingleByteMode;
1047 dynamicWindow=scsu->fromUDynamicWindow;
1048 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1049
1050 c=cnv->fromUChar32;
1051
1052 /* sourceIndex=-1 if the current character began in the previous buffer */
1053 sourceIndex= c==0 ? 0 : -1;
1054 nextSourceIndex=0;
1055
1056 /* similar conversion "loop" as in toUnicode */
1057loop:
1058 if(isSingleByteMode) {
1059 if(c!=0 && targetCapacity>0) {
1060 goto getTrailSingle;
1061 }
1062
1063 /* state machine for single-byte mode */
1064/* singleByteMode: */
1065 while(source<sourceLimit) {
1066 if(targetCapacity<=0) {
1067 /* target is full */
1068 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1069 break;
1070 }
1071 c=*source++;
1072 ++nextSourceIndex;
1073
1074 if((c-0x20)<=0x5f) {
1075 /* pass US-ASCII graphic character through */
1076 *target++=(uint8_t)c;
1077 if(offsets!=nullptr) {
1078 *offsets++=sourceIndex;
1079 }
1080 --targetCapacity;
1081 } else if(c<0x20) {
1082 if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1083 /* CR/LF/TAB/NUL */
1084 *target++=(uint8_t)c;
1085 if(offsets!=nullptr) {
1086 *offsets++=sourceIndex;
1087 }
1088 --targetCapacity;
1089 } else {
1090 /* quote C0 control character */
1091 c|=SQ0<<8;
1092 length=2;
1093 goto outputBytes;
1094 }
1095 } else if((delta=c-currentOffset)<=0x7f) {
1096 /* use the current dynamic window */
1097 *target++=(uint8_t)(delta|0x80);
1098 if(offsets!=nullptr) {
1099 *offsets++=sourceIndex;
1100 }
1101 --targetCapacity;
1102 } else if(U16_IS_SURROGATE(c)(((c)&0xfffff800)==0xd800)) {
1103 if(U16_IS_SURROGATE_LEAD(c)(((c)&0x400)==0)) {
1104getTrailSingle:
1105 lead=(char16_t)c;
1106 if(source<sourceLimit) {
1107 /* test the following code unit */
1108 trail=*source;
1109 if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
1110 ++source;
1111 ++nextSourceIndex;
1112 c=U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<<
10UL)+0xdc00-0x10000))
;
1113 /* convert this surrogate code point */
1114 /* exit this condition tree */
1115 } else {
1116 /* this is an unmatched lead code unit (1st surrogate) */
1117 /* callback(illegal) */
1118 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1119 goto endloop;
1120 }
1121 } else {
1122 /* no more input */
1123 break;
1124 }
1125 } else {
1126 /* this is an unmatched trail code unit (2nd surrogate) */
1127 /* callback(illegal) */
1128 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1129 goto endloop;
1130 }
1131
1132 /* compress supplementary character U+10000..U+10ffff */
1133 if((delta=c-currentOffset)<=0x7f) {
1134 /* use the current dynamic window */
1135 *target++=(uint8_t)(delta|0x80);
1136 if(offsets!=nullptr) {
1137 *offsets++=sourceIndex;
1138 }
1139 --targetCapacity;
1140 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1141 /* there is a dynamic window that contains this character, change to it */
1142 dynamicWindow=window;
1143 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1144 useDynamicWindow(scsu, dynamicWindow);
1145 c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1146 length=2;
1147 goto outputBytes;
1148 } else if((code=getDynamicOffset(c, &offset))>=0) {
1149 /* might check if there are more characters in this window to come */
1150 /* define an extended window with this character */
1151 code-=0x200;
1152 dynamicWindow=getNextDynamicWindow(scsu);
1153 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1154 useDynamicWindow(scsu, dynamicWindow);
1155 c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1156 length=4;
1157 goto outputBytes;
1158 } else {
1159 /* change to Unicode mode and output this (lead, trail) pair */
1160 isSingleByteMode=false;
1161 *target++=(uint8_t)SCU;
1162 if(offsets!=nullptr) {
1163 *offsets++=sourceIndex;
1164 }
1165 --targetCapacity;
1166 c=((uint32_t)lead<<16)|trail;
1167 length=4;
1168 goto outputBytes;
1169 }
1170 } else if(c<0xa0) {
1171 /* quote C1 control character */
1172 c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1173 length=2;
1174 goto outputBytes;
1175 } else if(c==0xfeff || c>=0xfff0) {
1176 /* quote signature character=byte order mark and specials */
1177 c|=SQU<<16;
1178 length=3;
1179 goto outputBytes;
1180 } else {
1181 /* compress all other BMP characters */
1182 if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1183 /* there is a window defined that contains this character - switch to it or quote from it? */
1184 if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1185 /* change to dynamic window */
1186 dynamicWindow=window;
1187 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1188 useDynamicWindow(scsu, dynamicWindow);
1189 c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1190 length=2;
1191 goto outputBytes;
1192 } else {
1193 /* quote from dynamic window */
1194 c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1195 length=2;
1196 goto outputBytes;
1197 }
1198 } else if((window=getWindow(staticOffsets, c))>=0) {
1199 /* quote from static window */
1200 c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1201 length=2;
1202 goto outputBytes;
1203 } else if((code=getDynamicOffset(c, &offset))>=0) {
1204 /* define a dynamic window with this character */
1205 dynamicWindow=getNextDynamicWindow(scsu);
1206 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1207 useDynamicWindow(scsu, dynamicWindow);
1208 c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1209 length=3;
1210 goto outputBytes;
1211 } else if ((c - 0x3400) < (0xd800 - 0x3400) &&
1212 (source >= sourceLimit || (uint32_t)(*source - 0x3400) < (0xd800 - 0x3400))
1213 ) {
1214 /*
1215 * this character is not compressible (a BMP ideograph or similar);
1216 * switch to Unicode mode if this is the last character in the block
1217 * or there is at least one more ideograph following immediately
1218 */
1219 isSingleByteMode=false;
1220 c|=SCU<<16;
1221 length=3;
1222 goto outputBytes;
1223 } else {
1224 /* quote Unicode */
1225 c|=SQU<<16;
1226 length=3;
1227 goto outputBytes;
1228 }
1229 }
1230
1231 /* normal end of conversion: prepare for a new character */
1232 c=0;
1233 sourceIndex=nextSourceIndex;
1234 }
1235 } else {
1236 if(c!=0 && targetCapacity>0) {
1237 goto getTrailUnicode;
1238 }
1239
1240 /* state machine for Unicode mode */
1241/* unicodeByteMode: */
1242 while(source<sourceLimit) {
1243 if(targetCapacity<=0) {
1244 /* target is full */
1245 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1246 break;
1247 }
1248 c=*source++;
1249 ++nextSourceIndex;
1250
1251 if ((c - 0x3400) < (0xd800 - 0x3400)) {
1252 /* not compressible, write character directly */
1253 if(targetCapacity>=2) {
1254 *target++=(uint8_t)(c>>8);
1255 *target++=(uint8_t)c;
1256 if(offsets!=nullptr) {
1257 *offsets++=sourceIndex;
1258 *offsets++=sourceIndex;
1259 }
1260 targetCapacity-=2;
1261 } else {
1262 length=2;
1263 goto outputBytes;
1264 }
1265 } else if (c - 0x3400 >= 0xf300 - 0x3400 /* c<0x3400 || c>=0xf300 */) {
1266 /* compress BMP character if the following one is not an uncompressible ideograph */
1267 if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1268 if (c - 0x30 < 10 || c - 0x61 < 26 || c - 0x41 < 26) {
1269 /* ASCII digit or letter */
1270 isSingleByteMode=true;
1271 c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1272 length=2;
1273 goto outputBytes;
1274 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1275 /* there is a dynamic window that contains this character, change to it */
1276 isSingleByteMode=true;
1277 dynamicWindow=window;
1278 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1279 useDynamicWindow(scsu, dynamicWindow);
1280 c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1281 length=2;
1282 goto outputBytes;
1283 } else if((code=getDynamicOffset(c, &offset))>=0) {
1284 /* define a dynamic window with this character */
1285 isSingleByteMode=true;
1286 dynamicWindow=getNextDynamicWindow(scsu);
1287 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1288 useDynamicWindow(scsu, dynamicWindow);
1289 c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1290 length=3;
1291 goto outputBytes;
1292 }
1293 }
1294
1295 /* don't know how to compress this character, just write it directly */
1296 length=2;
1297 goto outputBytes;
1298 } else if(c<0xe000) {
1299 /* c is a surrogate */
1300 if(U16_IS_SURROGATE_LEAD(c)(((c)&0x400)==0)) {
1301getTrailUnicode:
1302 lead=(char16_t)c;
1303 if(source<sourceLimit) {
1304 /* test the following code unit */
1305 trail=*source;
1306 if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
1307 ++source;
1308 ++nextSourceIndex;
1309 c=U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<<
10UL)+0xdc00-0x10000))
;
1310 /* convert this surrogate code point */
1311 /* exit this condition tree */
1312 } else {
1313 /* this is an unmatched lead code unit (1st surrogate) */
1314 /* callback(illegal) */
1315 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1316 goto endloop;
1317 }
1318 } else {
1319 /* no more input */
1320 break;
1321 }
1322 } else {
1323 /* this is an unmatched trail code unit (2nd surrogate) */
1324 /* callback(illegal) */
1325 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1326 goto endloop;
1327 }
1328
1329 /* compress supplementary character */
1330 if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1331 !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1332 ) {
1333 /*
1334 * there is a dynamic window that contains this character and
1335 * the following character is not uncompressible,
1336 * change to the window
1337 */
1338 isSingleByteMode=true;
1339 dynamicWindow=window;
1340 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1341 useDynamicWindow(scsu, dynamicWindow);
1342 c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1343 length=2;
1344 goto outputBytes;
1345 } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1346 (code=getDynamicOffset(c, &offset))>=0
1347 ) {
1348 /* two supplementary characters in (probably) the same window - define an extended one */
1349 isSingleByteMode=true;
1350 code-=0x200;
1351 dynamicWindow=getNextDynamicWindow(scsu);
1352 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1353 useDynamicWindow(scsu, dynamicWindow);
1354 c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1355 length=4;
1356 goto outputBytes;
1357 } else {
1358 /* don't know how to compress this character, just write it directly */
1359 c=((uint32_t)lead<<16)|trail;
1360 length=4;
1361 goto outputBytes;
1362 }
1363 } else /* 0xe000<=c<0xf300 */ {
1364 /* quote to avoid SCSU tags */
1365 c|=UQU<<16;
1366 length=3;
1367 goto outputBytes;
1368 }
1369
1370 /* normal end of conversion: prepare for a new character */
1371 c=0;
1372 sourceIndex=nextSourceIndex;
1373 }
1374 }
1375endloop:
1376
1377 /* set the converter state back into UConverter */
1378 scsu->fromUIsSingleByteMode=isSingleByteMode;
1379 scsu->fromUDynamicWindow=dynamicWindow;
1380
1381 cnv->fromUChar32=c;
1382
1383 /* write back the updated pointers */
1384 pArgs->source=source;
1385 pArgs->target=(char *)target;
1386 pArgs->offsets=offsets;
1387 return;
1388
1389outputBytes:
1390 /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1391 /* from the first if in the loop we know that targetCapacity>0 */
1392 if(length<=targetCapacity) {
1393 if(offsets==nullptr) {
1394 switch(length) {
1395 /* each branch falls through to the next one */
1396 case 4:
1397 *target++=(uint8_t)(c>>24);
1398 U_FALLTHROUGH[[clang::fallthrough]];
1399 case 3:
1400 *target++=(uint8_t)(c>>16);
1401 U_FALLTHROUGH[[clang::fallthrough]];
1402 case 2:
1403 *target++=(uint8_t)(c>>8);
1404 U_FALLTHROUGH[[clang::fallthrough]];
1405 case 1:
1406 *target++=(uint8_t)c;
1407 U_FALLTHROUGH[[clang::fallthrough]];
1408 default:
1409 /* will never occur */
1410 break;
1411 }
1412 } else {
1413 switch(length) {
1414 /* each branch falls through to the next one */
1415 case 4:
1416 *target++=(uint8_t)(c>>24);
1417 *offsets++=sourceIndex;
1418 U_FALLTHROUGH[[clang::fallthrough]];
1419 case 3:
1420 *target++=(uint8_t)(c>>16);
1421 *offsets++=sourceIndex;
1422 U_FALLTHROUGH[[clang::fallthrough]];
1423 case 2:
1424 *target++=(uint8_t)(c>>8);
1425 *offsets++=sourceIndex;
1426 U_FALLTHROUGH[[clang::fallthrough]];
1427 case 1:
1428 *target++=(uint8_t)c;
1429 *offsets++=sourceIndex;
1430 U_FALLTHROUGH[[clang::fallthrough]];
1431 default:
1432 /* will never occur */
1433 break;
1434 }
1435 }
1436 targetCapacity-=length;
1437
1438 /* normal end of conversion: prepare for a new character */
1439 c=0;
1440 sourceIndex=nextSourceIndex;
1441 goto loop;
1442 } else {
1443 uint8_t *p;
1444
1445 /*
1446 * We actually do this backwards here:
1447 * In order to save an intermediate variable, we output
1448 * first to the overflow buffer what does not fit into the
1449 * regular target.
1450 */
1451 /* we know that 0<=targetCapacity<length<=4 */
1452 /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
1453 length-=targetCapacity;
1454 p=(uint8_t *)cnv->charErrorBuffer;
1455 switch(length) {
1456 /* each branch falls through to the next one */
1457 case 4:
1458 *p++=(uint8_t)(c>>24);
1459 U_FALLTHROUGH[[clang::fallthrough]];
1460 case 3:
1461 *p++=(uint8_t)(c>>16);
1462 U_FALLTHROUGH[[clang::fallthrough]];
1463 case 2:
1464 *p++=(uint8_t)(c>>8);
1465 U_FALLTHROUGH[[clang::fallthrough]];
1466 case 1:
1467 *p=(uint8_t)c;
1468 U_FALLTHROUGH[[clang::fallthrough]];
1469 default:
1470 /* will never occur */
1471 break;
1472 }
1473 cnv->charErrorBufferLength=(int8_t)length;
1474
1475 /* now output what fits into the regular target */
1476 c>>=8*length; /* length was reduced by targetCapacity */
1477 switch(targetCapacity) {
1478 /* each branch falls through to the next one */
1479 case 3:
1480 *target++=(uint8_t)(c>>16);
1481 if(offsets!=nullptr) {
1482 *offsets++=sourceIndex;
1483 }
1484 U_FALLTHROUGH[[clang::fallthrough]];
1485 case 2:
1486 *target++=(uint8_t)(c>>8);
1487 if(offsets!=nullptr) {
1488 *offsets++=sourceIndex;
1489 }
1490 U_FALLTHROUGH[[clang::fallthrough]];
1491 case 1:
1492 *target++=(uint8_t)c;
1493 if(offsets!=nullptr) {
1494 *offsets++=sourceIndex;
1495 }
1496 U_FALLTHROUGH[[clang::fallthrough]];
1497 default:
1498 break;
1499 }
1500
1501 /* target overflow */
1502 targetCapacity=0;
1503 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1504 c=0;
1505 goto endloop;
1506 }
1507}
1508
1509/*
1510 * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
1511 * If a change is made in the original function, then either
1512 * change this function the same way or
1513 * re-copy the original function and remove the variables
1514 * offsets, sourceIndex, and nextSourceIndex.
1515 */
1516static void U_CALLCONV
1517_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
1518 UErrorCode *pErrorCode) {
1519 UConverter *cnv;
1520 SCSUData *scsu;
1521 const char16_t *source, *sourceLimit;
1522 uint8_t *target;
1523 int32_t targetCapacity;
1524
1525 UBool isSingleByteMode;
1526 uint8_t dynamicWindow;
1527 uint32_t currentOffset;
1528
1529 uint32_t c, delta;
1530
1531 int32_t length;
1532
1533 /* variables for compression heuristics */
1534 uint32_t offset;
1535 char16_t lead, trail;
1536 int code;
1537 int8_t window;
1538
1539 /* set up the local pointers */
1540 cnv=pArgs->converter;
1541 scsu=(SCSUData *)cnv->extraInfo;
1542
1543 /* set up the local pointers */
1544 source=pArgs->source;
1545 sourceLimit=pArgs->sourceLimit;
1546 target=(uint8_t *)pArgs->target;
1547 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1548
1549 /* get the state machine state */
1550 isSingleByteMode=scsu->fromUIsSingleByteMode;
1551 dynamicWindow=scsu->fromUDynamicWindow;
1552 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1553
1554 c=cnv->fromUChar32;
1555
1556 /* similar conversion "loop" as in toUnicode */
1557loop:
1558 if(isSingleByteMode) {
1559 if(c!=0 && targetCapacity>0) {
1560 goto getTrailSingle;
1561 }
1562
1563 /* state machine for single-byte mode */
1564/* singleByteMode: */
1565 while(source<sourceLimit) {
1566 if(targetCapacity<=0) {
1567 /* target is full */
1568 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1569 break;
1570 }
1571 c=*source++;
1572
1573 if((c-0x20)<=0x5f) {
1574 /* pass US-ASCII graphic character through */
1575 *target++=(uint8_t)c;
1576 --targetCapacity;
1577 } else if(c<0x20) {
1578 if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1579 /* CR/LF/TAB/NUL */
1580 *target++=(uint8_t)c;
1581 --targetCapacity;
1582 } else {
1583 /* quote C0 control character */
1584 c|=SQ0<<8;
1585 length=2;
1586 goto outputBytes;
1587 }
1588 } else if((delta=c-currentOffset)<=0x7f) {
1589 /* use the current dynamic window */
1590 *target++=(uint8_t)(delta|0x80);
1591 --targetCapacity;
1592 } else if(U16_IS_SURROGATE(c)(((c)&0xfffff800)==0xd800)) {
1593 if(U16_IS_SURROGATE_LEAD(c)(((c)&0x400)==0)) {
1594getTrailSingle:
1595 lead=(char16_t)c;
1596 if(source<sourceLimit) {
1597 /* test the following code unit */
1598 trail=*source;
1599 if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
1600 ++source;
1601 c=U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<<
10UL)+0xdc00-0x10000))
;
1602 /* convert this surrogate code point */
1603 /* exit this condition tree */
1604 } else {
1605 /* this is an unmatched lead code unit (1st surrogate) */
1606 /* callback(illegal) */
1607 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1608 goto endloop;
1609 }
1610 } else {
1611 /* no more input */
1612 break;
1613 }
1614 } else {
1615 /* this is an unmatched trail code unit (2nd surrogate) */
1616 /* callback(illegal) */
1617 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1618 goto endloop;
1619 }
1620
1621 /* compress supplementary character U+10000..U+10ffff */
1622 if((delta=c-currentOffset)<=0x7f) {
1623 /* use the current dynamic window */
1624 *target++=(uint8_t)(delta|0x80);
1625 --targetCapacity;
1626 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1627 /* there is a dynamic window that contains this character, change to it */
1628 dynamicWindow=window;
1629 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1630 useDynamicWindow(scsu, dynamicWindow);
1631 c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1632 length=2;
1633 goto outputBytes;
1634 } else if((code=getDynamicOffset(c, &offset))>=0) {
1635 /* might check if there are more characters in this window to come */
1636 /* define an extended window with this character */
1637 code-=0x200;
1638 dynamicWindow=getNextDynamicWindow(scsu);
1639 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1640 useDynamicWindow(scsu, dynamicWindow);
1641 c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1642 length=4;
1643 goto outputBytes;
1644 } else {
1645 /* change to Unicode mode and output this (lead, trail) pair */
1646 isSingleByteMode=false;
1647 *target++=(uint8_t)SCU;
1648 --targetCapacity;
1649 c=((uint32_t)lead<<16)|trail;
1650 length=4;
1651 goto outputBytes;
1652 }
1653 } else if(c<0xa0) {
1654 /* quote C1 control character */
1655 c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1656 length=2;
1657 goto outputBytes;
1658 } else if(c==0xfeff || c>=0xfff0) {
1659 /* quote signature character=byte order mark and specials */
1660 c|=SQU<<16;
1661 length=3;
1662 goto outputBytes;
1663 } else {
1664 /* compress all other BMP characters */
1665 if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1666 /* there is a window defined that contains this character - switch to it or quote from it? */
1667 if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1668 /* change to dynamic window */
1669 dynamicWindow=window;
1670 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1671 useDynamicWindow(scsu, dynamicWindow);
1672 c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1673 length=2;
1674 goto outputBytes;
1675 } else {
1676 /* quote from dynamic window */
1677 c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1678 length=2;
1679 goto outputBytes;
1680 }
1681 } else if((window=getWindow(staticOffsets, c))>=0) {
1682 /* quote from static window */
1683 c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1684 length=2;
1685 goto outputBytes;
1686 } else if((code=getDynamicOffset(c, &offset))>=0) {
1687 /* define a dynamic window with this character */
1688 dynamicWindow=getNextDynamicWindow(scsu);
1689 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1690 useDynamicWindow(scsu, dynamicWindow);
1691 c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1692 length=3;
1693 goto outputBytes;
1694 } else if (c - 0x3400 < 0xd800 - 0x3400 &&
1695 (source >= sourceLimit || static_cast<uint32_t>(*source - 0x3400) < 0xd800 - 0x3400)
1696 ) {
1697 /*
1698 * this character is not compressible (a BMP ideograph or similar);
1699 * switch to Unicode mode if this is the last character in the block
1700 * or there is at least one more ideograph following immediately
1701 */
1702 isSingleByteMode=false;
1703 c|=SCU<<16;
1704 length=3;
1705 goto outputBytes;
1706 } else {
1707 /* quote Unicode */
1708 c|=SQU<<16;
1709 length=3;
1710 goto outputBytes;
1711 }
1712 }
1713
1714 /* normal end of conversion: prepare for a new character */
1715 c=0;
1716 }
1717 } else {
1718 if(c!=0 && targetCapacity>0) {
1719 goto getTrailUnicode;
1720 }
1721
1722 /* state machine for Unicode mode */
1723/* unicodeByteMode: */
1724 while(source<sourceLimit) {
1725 if(targetCapacity<=0) {
1726 /* target is full */
1727 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1728 break;
1729 }
1730 c=*source++;
1731
1732 if (c - 0x3400 < 0xd800 - 0x3400) {
1733 /* not compressible, write character directly */
1734 if(targetCapacity>=2) {
1735 *target++=(uint8_t)(c>>8);
1736 *target++=(uint8_t)c;
1737 targetCapacity-=2;
1738 } else {
1739 length=2;
1740 goto outputBytes;
1741 }
1742 } else if (c - 0x3400 >= 0xf300 - 0x3400 /* c<0x3400 || c>=0xf300 */) {
1743 /* compress BMP character if the following one is not an uncompressible ideograph */
1744 if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1745 if (c - 0x30 < 10 || c - 0x61 < 26 || c - 0x41 < 26) {
1746 /* ASCII digit or letter */
1747 isSingleByteMode=true;
1748 c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1749 length=2;
1750 goto outputBytes;
1751 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1752 /* there is a dynamic window that contains this character, change to it */
1753 isSingleByteMode=true;
1754 dynamicWindow=window;
1755 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1756 useDynamicWindow(scsu, dynamicWindow);
1757 c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1758 length=2;
1759 goto outputBytes;
1760 } else if((code=getDynamicOffset(c, &offset))>=0) {
1761 /* define a dynamic window with this character */
1762 isSingleByteMode=true;
1763 dynamicWindow=getNextDynamicWindow(scsu);
1764 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1765 useDynamicWindow(scsu, dynamicWindow);
1766 c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1767 length=3;
1768 goto outputBytes;
1769 }
1770 }
1771
1772 /* don't know how to compress this character, just write it directly */
1773 length=2;
1774 goto outputBytes;
1775 } else if(c<0xe000) {
1776 /* c is a surrogate */
1777 if(U16_IS_SURROGATE_LEAD(c)(((c)&0x400)==0)) {
1778getTrailUnicode:
1779 lead=(char16_t)c;
1780 if(source<sourceLimit) {
1781 /* test the following code unit */
1782 trail=*source;
1783 if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
1784 ++source;
1785 c=U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<<
10UL)+0xdc00-0x10000))
;
1786 /* convert this surrogate code point */
1787 /* exit this condition tree */
1788 } else {
1789 /* this is an unmatched lead code unit (1st surrogate) */
1790 /* callback(illegal) */
1791 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1792 goto endloop;
1793 }
1794 } else {
1795 /* no more input */
1796 break;
1797 }
1798 } else {
1799 /* this is an unmatched trail code unit (2nd surrogate) */
1800 /* callback(illegal) */
1801 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1802 goto endloop;
1803 }
1804
1805 /* compress supplementary character */
1806 if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1807 !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1808 ) {
1809 /*
1810 * there is a dynamic window that contains this character and
1811 * the following character is not uncompressible,
1812 * change to the window
1813 */
1814 isSingleByteMode=true;
1815 dynamicWindow=window;
1816 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1817 useDynamicWindow(scsu, dynamicWindow);
1818 c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1819 length=2;
1820 goto outputBytes;
1821 } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1822 (code=getDynamicOffset(c, &offset))>=0
1823 ) {
1824 /* two supplementary characters in (probably) the same window - define an extended one */
1825 isSingleByteMode=true;
1826 code-=0x200;
1827 dynamicWindow=getNextDynamicWindow(scsu);
1828 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1829 useDynamicWindow(scsu, dynamicWindow);
1830 c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1831 length=4;
1832 goto outputBytes;
1833 } else {
1834 /* don't know how to compress this character, just write it directly */
1835 c=((uint32_t)lead<<16)|trail;
1836 length=4;
1837 goto outputBytes;
1838 }
1839 } else /* 0xe000<=c<0xf300 */ {
1840 /* quote to avoid SCSU tags */
1841 c|=UQU<<16;
1842 length=3;
1843 goto outputBytes;
1844 }
1845
1846 /* normal end of conversion: prepare for a new character */
1847 c=0;
1848 }
1849 }
1850endloop:
1851
1852 /* set the converter state back into UConverter */
1853 scsu->fromUIsSingleByteMode=isSingleByteMode;
1854 scsu->fromUDynamicWindow=dynamicWindow;
1855
1856 cnv->fromUChar32=c;
1857
1858 /* write back the updated pointers */
1859 pArgs->source=source;
1860 pArgs->target=(char *)target;
1861 return;
1862
1863outputBytes:
1864 /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1865 /* from the first if in the loop we know that targetCapacity>0 */
1866 if(length<=targetCapacity) {
1867 switch(length) {
1868 /* each branch falls through to the next one */
1869 case 4:
1870 *target++=(uint8_t)(c>>24);
1871 U_FALLTHROUGH[[clang::fallthrough]];
1872 case 3:
1873 *target++=(uint8_t)(c>>16);
1874 U_FALLTHROUGH[[clang::fallthrough]];
1875 case 2:
1876 *target++=(uint8_t)(c>>8);
1877 U_FALLTHROUGH[[clang::fallthrough]];
1878 case 1:
1879 *target++=(uint8_t)c;
1880 U_FALLTHROUGH[[clang::fallthrough]];
1881 default:
1882 /* will never occur */
1883 break;
1884 }
1885 targetCapacity-=length;
1886
1887 /* normal end of conversion: prepare for a new character */
1888 c=0;
1889 goto loop;
1890 } else {
1891 uint8_t *p;
1892
1893 /*
1894 * We actually do this backwards here:
1895 * In order to save an intermediate variable, we output
1896 * first to the overflow buffer what does not fit into the
1897 * regular target.
1898 */
1899 /* we know that 0<=targetCapacity<length<=4 */
1900 /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
1901 length-=targetCapacity;
1902 p=(uint8_t *)cnv->charErrorBuffer;
1903 switch(length) {
1904 /* each branch falls through to the next one */
1905 case 4:
1906 *p++=(uint8_t)(c>>24);
1907 U_FALLTHROUGH[[clang::fallthrough]];
1908 case 3:
1909 *p++=(uint8_t)(c>>16);
1910 U_FALLTHROUGH[[clang::fallthrough]];
1911 case 2:
1912 *p++=(uint8_t)(c>>8);
1913 U_FALLTHROUGH[[clang::fallthrough]];
1914 case 1:
1915 *p=(uint8_t)c;
1916 U_FALLTHROUGH[[clang::fallthrough]];
1917 default:
1918 /* will never occur */
1919 break;
1920 }
1921 cnv->charErrorBufferLength=(int8_t)length;
1922
1923 /* now output what fits into the regular target */
1924 c = (length == 4) ? 0 : c >> 8*length; /* length was reduced by targetCapacity */
1925 switch(targetCapacity) {
1926 /* each branch falls through to the next one */
1927 case 3:
1928 *target++=(uint8_t)(c>>16);
1929 U_FALLTHROUGH[[clang::fallthrough]];
1930 case 2:
1931 *target++=(uint8_t)(c>>8);
1932 U_FALLTHROUGH[[clang::fallthrough]];
1933 case 1:
1934 *target++=(uint8_t)c;
1935 U_FALLTHROUGH[[clang::fallthrough]];
1936 default:
1937 break;
1938 }
1939
1940 /* target overflow */
1941 targetCapacity=0;
Value stored to 'targetCapacity' is never read
1942 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1943 c=0;
1944 goto endloop;
1945 }
1946}
1947
1948/* miscellaneous ------------------------------------------------------------ */
1949
1950static const char * U_CALLCONV
1951_SCSUGetName(const UConverter *cnv) {
1952 SCSUData *scsu=(SCSUData *)cnv->extraInfo;
1953
1954 switch(scsu->locale) {
1955 case l_ja:
1956 return "SCSU,locale=ja";
1957 default:
1958 return "SCSU";
1959 }
1960}
1961
1962/* structure for SafeClone calculations */
1963struct cloneSCSUStruct
1964{
1965 UConverter cnv;
1966 SCSUData mydata;
1967};
1968
1969static UConverter * U_CALLCONV
1970_SCSUSafeClone(const UConverter *cnv,
1971 void *stackBuffer,
1972 int32_t *pBufferSize,
1973 UErrorCode *status)
1974{
1975 struct cloneSCSUStruct * localClone;
1976 int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
1977
1978 if (U_FAILURE(*status)){
1979 return nullptr;
1980 }
1981
1982 if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
1983 *pBufferSize = bufferSizeNeeded;
1984 return nullptr;
1985 }
1986
1987 localClone = (struct cloneSCSUStruct *)stackBuffer;
1988 /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
1989
1990 uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData))do { clang diagnostic push clang diagnostic ignored "-Waddress"
(static_cast <bool> (&localClone->mydata != __null
) ? void (0) : __assert_fail ("&localClone->mydata != __null"
, __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__
)); (static_cast <bool> (cnv->extraInfo != __null) ?
void (0) : __assert_fail ("cnv->extraInfo != __null", __builtin_FILE
(), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); clang
diagnostic pop :: memcpy(&localClone->mydata, cnv->
extraInfo, sizeof(SCSUData)); } while (false)
;
1991 localClone->cnv.extraInfo = &localClone->mydata;
1992 localClone->cnv.isExtraLocal = true;
1993
1994 return &localClone->cnv;
1995}
1996U_CDECL_END}
1997
1998static const UConverterImpl _SCSUImpl={
1999 UCNV_SCSU,
2000
2001 nullptr,
2002 nullptr,
2003
2004 _SCSUOpen,
2005 _SCSUClose,
2006 _SCSUReset,
2007
2008 _SCSUToUnicode,
2009 _SCSUToUnicodeWithOffsets,
2010 _SCSUFromUnicode,
2011 _SCSUFromUnicodeWithOffsets,
2012 nullptr,
2013
2014 nullptr,
2015 _SCSUGetName,
2016 nullptr,
2017 _SCSUSafeClone,
2018 ucnv_getCompleteUnicodeSetucnv_getCompleteUnicodeSet_77,
2019 nullptr,
2020 nullptr
2021};
2022
2023static const UConverterStaticData _SCSUStaticData={
2024 sizeof(UConverterStaticData),
2025 "SCSU",
2026 1212, /* CCSID for SCSU */
2027 UCNV_IBM, UCNV_SCSU,
2028 1, 3, /* one char16_t generates at least 1 byte and at most 3 bytes */
2029 /*
2030 * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
2031 * substitution string.
2032 */
2033 { 0x0e, 0xff, 0xfd, 0 }, 3,
2034 false, false,
2035 0,
2036 0,
2037 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
2038};
2039
2040const UConverterSharedData _SCSUData_SCSUData_77=
2041 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl){ sizeof(UConverterSharedData), ~((uint32_t)0), __null, &
_SCSUStaticData, false, false, &_SCSUImpl, 0, { 0, 0, 0, 0
, __null, __null, __null, __null, __null, __null, { 0 }, __null
, __null, 0, 0, 0, false, 0, 0, __null, __null, __null, __null
} }
;
2042
2043#endif