Bug Summary

File:root/firefox-clang/intl/icu/source/i18n/nfrs.cpp
Warning:line 117, column 14
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name nfrs.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/i18n -fcoverage-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/i18n -resource-dir /usr/lib/llvm-21/lib/clang/21 -include /root/firefox-clang/config/gcc_hidden.h -include /root/firefox-clang/obj-x86_64-pc-linux-gnu/mozilla-config.h -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -D U_I18N_IMPLEMENTATION -D _LIBCPP_DISABLE_DEPRECATION_WARNINGS -D U_USING_ICU_NAMESPACE=0 -D U_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 -D U_HIDE_OBSOLETE_UTF_OLD_H=1 -D UCONFIG_NO_LEGACY_CONVERSION -D UCONFIG_NO_TRANSLITERATION -D UCONFIG_NO_REGULAR_EXPRESSIONS -D UCONFIG_NO_BREAK_ITERATION -D UCONFIG_NO_IDNA -D UCONFIG_NO_MF2 -D U_CHARSET_IS_UTF8 -D UNISTR_FROM_CHAR_EXPLICIT=explicit -D UNISTR_FROM_STRING_EXPLICIT=explicit -D U_ENABLE_DYLOAD=0 -D U_DEBUG=1 -I /root/firefox-clang/config/external/icu/i18n -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/config/external/icu/i18n -I /root/firefox-clang/intl/icu/source/common -I /root/firefox-clang/mfbt/double-conversion -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/x86_64-linux-gnu/c++/14 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/backward -internal-isystem /usr/lib/llvm-21/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=pessimizing-move -Wno-error=large-by-value-copy=128 -Wno-error=implicit-int-float-conversion -Wno-error=thread-safety-analysis -Wno-error=tautological-type-limit-compare -Wno-invalid-offsetof -Wno-range-loop-analysis -Wno-deprecated-anon-enum-enum-conversion -Wno-deprecated-enum-enum-conversion -Wno-deprecated-this-capture -Wno-inline-new-delete -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-vla-cxx-extension -Wno-unknown-warning-option -Wno-comma -Wno-implicit-const-int-float-conversion -Wno-macro-redefined -Wno-microsoft-include -Wno-tautological-unsigned-enum-zero-compare -Wno-unreachable-code-loop-increment -Wno-unreachable-code-return -fdeprecated-macro -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fno-sized-deallocation -fno-aligned-allocation -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-06-27-100320-3286336-1 -x c++ /root/firefox-clang/intl/icu/source/i18n/nfrs.cpp
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5* Copyright (C) 1997-2015, International Business Machines
6* Corporation and others. All Rights Reserved.
7******************************************************************************
8* file name: nfrs.cpp
9* encoding: UTF-8
10* tab size: 8 (not used)
11* indentation:4
12*
13* Modification history
14* Date Name Comments
15* 10/11/2001 Doug Ported from ICU4J
16*/
17
18#include "nfrs.h"
19
20#if U_HAVE_RBNF1
21
22#include "unicode/uchar.h"
23#include "nfrule.h"
24#include "nfrlist.h"
25#include "patternprops.h"
26#include "putilimp.h"
27
28#ifdef RBNF_DEBUG
29#include "cmemory.h"
30#endif
31
32enum {
33 /** -x */
34 NEGATIVE_RULE_INDEX = 0,
35 /** x.x */
36 IMPROPER_FRACTION_RULE_INDEX = 1,
37 /** 0.x */
38 PROPER_FRACTION_RULE_INDEX = 2,
39 /** x.0 */
40 DEFAULT_RULE_INDEX = 3,
41 /** Inf */
42 INFINITY_RULE_INDEX = 4,
43 /** NaN */
44 NAN_RULE_INDEX = 5,
45 NON_NUMERICAL_RULE_LENGTH = 6
46};
47
48U_NAMESPACE_BEGINnamespace icu_77 {
49
50#if 0
51// euclid's algorithm works with doubles
52// note, doubles only get us up to one quadrillion or so, which
53// isn't as much range as we get with longs. We probably still
54// want either 64-bit math, or BigInteger.
55
56static int64_t
57util_lcm(int64_t x, int64_t y)
58{
59 x.abs();
60 y.abs();
61
62 if (x == 0 || y == 0) {
63 return 0;
64 } else {
65 do {
66 if (x < y) {
67 int64_t t = x; x = y; y = t;
68 }
69 x -= y * (x/y);
70 } while (x != 0);
71
72 return y;
73 }
74}
75
76#else
77/**
78 * Calculates the least common multiple of x and y.
79 */
80static int64_t
81util_lcm(int64_t x, int64_t y)
82{
83 // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
84 // vol. 2, 1st ed., pp. 298-299
85 int64_t x1 = x;
86 int64_t y1 = y;
87
88 int p2 = 0;
89 while ((x1 & 1) == 0 && (y1 & 1) == 0) {
10
Assuming the condition is true
11
Assuming the condition is false
12
Loop condition is false. Execution continues on line 95
90 ++p2;
91 x1 >>= 1;
92 y1 >>= 1;
93 }
94
95 int64_t t;
96 if ((x1 & 1) == 1) {
13
Taking false branch
97 t = -y1;
98 } else {
99 t = x1;
100 }
101
102 while (t != 0) {
14
Assuming 't' is equal to 0
15
Loop condition is false. Execution continues on line 114
103 while ((t & 1) == 0) {
104 t = t >> 1;
105 }
106 if (t > 0) {
107 x1 = t;
108 } else {
109 y1 = -t;
110 }
111 t = x1 - y1;
112 }
113
114 int64_t gcd = x1 << p2;
16
'gcd' initialized to 0
115
116 // x * y == gcd(x, y) * lcm(x, y)
117 return x / gcd * y;
17
Division by zero
118}
119#endif
120
121static const char16_t gPercent = 0x0025;
122static const char16_t gColon = 0x003a;
123static const char16_t gSemicolon = 0x003b;
124static const char16_t gLineFeed = 0x000a;
125
126static const char16_t gPercentPercent[] =
127{
128 0x25, 0x25, 0
129}; /* "%%" */
130
131static const char16_t gNoparse[] =
132{
133 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
134}; /* "@noparse" */
135
136NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
137 : name()
138 , rules(0)
139 , owner(_owner)
140 , fractionRules()
141 , fIsFractionRuleSet(false)
142 , fIsPublic(false)
143 , fIsParseable(true)
144{
145 for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
146 nonNumericalRules[i] = nullptr;
147 }
148
149 if (U_FAILURE(status)) {
150 return;
151 }
152
153 UnicodeString& description = descriptions[index]; // !!! make sure index is valid
154
155 if (description.isEmpty()) {
156 // throw new IllegalArgumentException("Empty rule set description");
157 status = U_PARSE_ERROR;
158 return;
159 }
160
161 // if the description begins with a rule set name (the rule set
162 // name can be omitted in formatter descriptions that consist
163 // of only one rule set), copy it out into our "name" member
164 // and delete it from the description
165 if (description.charAt(0) == gPercent) {
166 int32_t pos = description.indexOf(gColon);
167 if (pos == -1) {
168 // throw new IllegalArgumentException("Rule set name doesn't end in colon");
169 status = U_PARSE_ERROR;
170 } else {
171 name.setTo(description, 0, pos);
172 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
173 }
174 description.remove(0, pos);
175 }
176 } else {
177 name.setTo(UNICODE_STRING_SIMPLE("%default")icu::UnicodeString(true, u"%default", -1));
178 }
179
180 if (description.isEmpty()) {
181 // throw new IllegalArgumentException("Empty rule set description");
182 status = U_PARSE_ERROR;
183 }
184
185 fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
186
187 if (name.endsWith(gNoparse, 8)) {
188 fIsParseable = false;
189 name.truncate(name.length() - 8); // remove the @noparse from the name
190 }
191
192 // all of the other members of NFRuleSet are initialized
193 // by parseRules()
194}
195
196void
197NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
198{
199 // start by creating a Vector whose elements are Strings containing
200 // the descriptions of the rules (one rule per element). The rules
201 // are separated by semicolons (there's no escape facility: ALL
202 // semicolons are rule delimiters)
203
204 if (U_FAILURE(status)) {
205 return;
206 }
207
208 // ensure we are starting with an empty rule list
209 rules.deleteAll();
210
211 // dlf - the original code kept a separate description array for no reason,
212 // so I got rid of it. The loop was too complex so I simplified it.
213
214 UnicodeString currentDescription;
215 int32_t oldP = 0;
216 while (oldP < description.length()) {
217 int32_t p = description.indexOf(gSemicolon, oldP);
218 if (p == -1) {
219 p = description.length();
220 }
221 currentDescription.setTo(description, oldP, p - oldP);
222 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
223 oldP = p + 1;
224 }
225
226 // for rules that didn't specify a base value, their base values
227 // were initialized to 0. Make another pass through the list and
228 // set all those rules' base values. We also remove any special
229 // rules from the list and put them into their own member variables
230 int64_t defaultBaseValue = 0;
231
232 // (this isn't a for loop because we might be deleting items from
233 // the vector-- we want to make sure we only increment i when
234 // we _didn't_ delete anything from the vector)
235 int32_t rulesSize = rules.size();
236 for (int32_t i = 0; i < rulesSize; i++) {
237 NFRule* rule = rules[i];
238 int64_t baseValue = rule->getBaseValue();
239
240 if (baseValue == 0) {
241 // if the rule's base value is 0, fill in a default
242 // base value (this will be 1 plus the preceding
243 // rule's base value for regular rule sets, and the
244 // same as the preceding rule's base value in fraction
245 // rule sets)
246 rule->setBaseValue(defaultBaseValue, status);
247 }
248 else {
249 // if it's a regular rule that already knows its base value,
250 // check to make sure the rules are in order, and update
251 // the default base value for the next rule
252 if (baseValue < defaultBaseValue) {
253 // throw new IllegalArgumentException("Rules are not in order");
254 status = U_PARSE_ERROR;
255 return;
256 }
257 defaultBaseValue = baseValue;
258 }
259 if (!fIsFractionRuleSet) {
260 ++defaultBaseValue;
261 }
262 }
263}
264
265/**
266 * Set one of the non-numerical rules.
267 * @param rule The rule to set.
268 */
269void NFRuleSet::setNonNumericalRule(NFRule *rule) {
270 switch (rule->getBaseValue()) {
271 case NFRule::kNegativeNumberRule:
272 delete nonNumericalRules[NEGATIVE_RULE_INDEX];
273 nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
274 return;
275 case NFRule::kImproperFractionRule:
276 setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, true);
277 return;
278 case NFRule::kProperFractionRule:
279 setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, true);
280 return;
281 case NFRule::kDefaultRule:
282 setBestFractionRule(DEFAULT_RULE_INDEX, rule, true);
283 return;
284 case NFRule::kInfinityRule:
285 delete nonNumericalRules[INFINITY_RULE_INDEX];
286 nonNumericalRules[INFINITY_RULE_INDEX] = rule;
287 return;
288 case NFRule::kNaNRule:
289 delete nonNumericalRules[NAN_RULE_INDEX];
290 nonNumericalRules[NAN_RULE_INDEX] = rule;
291 return;
292 case NFRule::kNoBase:
293 case NFRule::kOtherRule:
294 default:
295 // If we do not remember the rule inside the object.
296 // delete it here to prevent memory leak.
297 delete rule;
298 return;
299 }
300}
301
302/**
303 * Determine the best fraction rule to use. Rules matching the decimal point from
304 * DecimalFormatSymbols become the main set of rules to use.
305 * @param originalIndex The index into nonNumericalRules
306 * @param newRule The new rule to consider
307 * @param rememberRule Should the new rule be added to fractionRules.
308 */
309void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
310 if (rememberRule) {
311 fractionRules.add(newRule);
312 }
313 NFRule *bestResult = nonNumericalRules[originalIndex];
314 if (bestResult == nullptr) {
315 nonNumericalRules[originalIndex] = newRule;
316 }
317 else {
318 // We have more than one. Which one is better?
319 const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
320 if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
321 == newRule->getDecimalPoint())
322 {
323 nonNumericalRules[originalIndex] = newRule;
324 }
325 // else leave it alone
326 }
327}
328
329NFRuleSet::~NFRuleSet()
330{
331 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
332 if (i != IMPROPER_FRACTION_RULE_INDEX
333 && i != PROPER_FRACTION_RULE_INDEX
334 && i != DEFAULT_RULE_INDEX)
335 {
336 delete nonNumericalRules[i];
337 }
338 // else it will be deleted via NFRuleList fractionRules
339 }
340}
341
342static UBool
343util_equalRules(const NFRule* rule1, const NFRule* rule2)
344{
345 if (rule1) {
346 if (rule2) {
347 return *rule1 == *rule2;
348 }
349 } else if (!rule2) {
350 return true;
351 }
352 return false;
353}
354
355bool
356NFRuleSet::operator==(const NFRuleSet& rhs) const
357{
358 if (rules.size() == rhs.rules.size() &&
359 fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
360 name == rhs.name) {
361
362 // ...then compare the non-numerical rule lists...
363 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
364 if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
365 return false;
366 }
367 }
368
369 // ...then compare the rule lists...
370 for (uint32_t i = 0; i < rules.size(); ++i) {
371 if (*rules[i] != *rhs.rules[i]) {
372 return false;
373 }
374 }
375 return true;
376 }
377 return false;
378}
379
380void
381NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
382 for (uint32_t i = 0; i < rules.size(); ++i) {
383 rules[i]->setDecimalFormatSymbols(newSymbols, status);
384 }
385 // Switch the fraction rules to mirror the DecimalFormatSymbols.
386 for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= DEFAULT_RULE_INDEX; nonNumericalIdx++) {
387 if (nonNumericalRules[nonNumericalIdx]) {
388 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
389 NFRule *fractionRule = fractionRules[fIdx];
390 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
391 setBestFractionRule(nonNumericalIdx, fractionRule, false);
392 }
393 }
394 }
395 }
396
397 for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
398 NFRule *rule = nonNumericalRules[nnrIdx];
399 if (rule) {
400 rule->setDecimalFormatSymbols(newSymbols, status);
401 }
402 }
403}
404
405#define RECURSION_LIMIT64 64
406
407void
408NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
409{
410 if (recursionCount >= RECURSION_LIMIT64) {
411 // stop recursion
412 status = U_INVALID_STATE_ERROR;
413 return;
414 }
415 const NFRule *rule = findNormalRule(number);
416 if (rule) { // else error, but can't report it
417 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
418 }
419}
420
421void
422NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
423{
424 if (recursionCount >= RECURSION_LIMIT64) {
1
Assuming 'recursionCount' is < RECURSION_LIMIT
2
Taking false branch
425 // stop recursion
426 status = U_INVALID_STATE_ERROR;
427 return;
428 }
429 const NFRule *rule = findDoubleRule(number);
3
Calling 'NFRuleSet::findDoubleRule'
430 if (rule) { // else error, but can't report it
431 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
432 }
433}
434
435const NFRule*
436NFRuleSet::findDoubleRule(double number) const
437{
438 // if this is a fraction rule set, use findFractionRuleSetRule()
439 if (isFractionRuleSet()) {
4
Assuming the condition is true
5
Taking true branch
440 return findFractionRuleSetRule(number);
6
Calling 'NFRuleSet::findFractionRuleSetRule'
441 }
442
443 if (uprv_isNaNuprv_isNaN_77(number)) {
444 const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
445 if (!rule) {
446 rule = owner->getDefaultNaNRule();
447 }
448 return rule;
449 }
450
451 // if the number is negative, return the negative number rule
452 // (if there isn't a negative-number rule, we pretend it's a
453 // positive number)
454 if (number < 0) {
455 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
456 return nonNumericalRules[NEGATIVE_RULE_INDEX];
457 } else {
458 number = -number;
459 }
460 }
461
462 if (uprv_isInfiniteuprv_isInfinite_77(number)) {
463 const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
464 if (!rule) {
465 rule = owner->getDefaultInfinityRule();
466 }
467 return rule;
468 }
469
470 // if the number isn't an integer, we use one of the fraction rules...
471 if (number != uprv_flooruprv_floor_77(number)) {
472 // if the number is between 0 and 1, return the proper
473 // fraction rule
474 if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
475 return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
476 }
477 // otherwise, return the improper fraction rule
478 else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
479 return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
480 }
481 }
482
483 // if there's a default rule, use it to format the number
484 if (nonNumericalRules[DEFAULT_RULE_INDEX]) {
485 return nonNumericalRules[DEFAULT_RULE_INDEX];
486 }
487
488 // and if we haven't yet returned a rule, use findNormalRule()
489 // to find the applicable rule
490 int64_t r = util64_fromDouble(number + 0.5);
491 return findNormalRule(r);
492}
493
494const NFRule *
495NFRuleSet::findNormalRule(int64_t number) const
496{
497 // if this is a fraction rule set, use findFractionRuleSetRule()
498 // to find the rule (we should only go into this clause if the
499 // value is 0)
500 if (fIsFractionRuleSet) {
501 return findFractionRuleSetRule(static_cast<double>(number));
502 }
503
504 // if the number is negative, return the negative-number rule
505 // (if there isn't one, pretend the number is positive)
506 if (number < 0) {
507 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
508 return nonNumericalRules[NEGATIVE_RULE_INDEX];
509 } else {
510 number = -number;
511 }
512 }
513
514 // we have to repeat the preceding two checks, even though we
515 // do them in findRule(), because the version of format() that
516 // takes a long bypasses findRule() and goes straight to this
517 // function. This function does skip the fraction rules since
518 // we know the value is an integer (it also skips the default
519 // rule, since it's considered a fraction rule. Skipping the
520 // default rule in this function is also how we avoid infinite
521 // recursion)
522
523 // {dlf} unfortunately this fails if there are no rules except
524 // special rules. If there are no rules, use the default rule.
525
526 // binary-search the rule list for the applicable rule
527 // (a rule is used for all values from its base value to
528 // the next rule's base value)
529 int32_t hi = rules.size();
530 if (hi > 0) {
531 int32_t lo = 0;
532
533 while (lo < hi) {
534 int32_t mid = (lo + hi) / 2;
535 if (rules[mid]->getBaseValue() == number) {
536 return rules[mid];
537 }
538 else if (rules[mid]->getBaseValue() > number) {
539 hi = mid;
540 }
541 else {
542 lo = mid + 1;
543 }
544 }
545 if (hi == 0) { // bad rule set, minimum base > 0
546 return nullptr; // want to throw exception here
547 }
548
549 NFRule *result = rules[hi - 1];
550
551 // use shouldRollBack() to see whether we need to invoke the
552 // rollback rule (see shouldRollBack()'s documentation for
553 // an explanation of the rollback rule). If we do, roll back
554 // one rule and return that one instead of the one we'd normally
555 // return
556 if (result->shouldRollBack(number)) {
557 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
558 return nullptr;
559 }
560 result = rules[hi - 2];
561 }
562 return result;
563 }
564 // else use the default rule
565 return nonNumericalRules[DEFAULT_RULE_INDEX];
566}
567
568/**
569 * If this rule is a fraction rule set, this function is used by
570 * findRule() to select the most appropriate rule for formatting
571 * the number. Basically, the base value of each rule in the rule
572 * set is treated as the denominator of a fraction. Whichever
573 * denominator can produce the fraction closest in value to the
574 * number passed in is the result. If there's a tie, the earlier
575 * one in the list wins. (If there are two rules in a row with the
576 * same base value, the first one is used when the numerator of the
577 * fraction would be 1, and the second rule is used the rest of the
578 * time.
579 * @param number The number being formatted (which will always be
580 * a number between 0 and 1)
581 * @return The rule to use to format this number
582 */
583const NFRule*
584NFRuleSet::findFractionRuleSetRule(double number) const
585{
586 // the obvious way to do this (multiply the value being formatted
587 // by each rule's base value until you get an integral result)
588 // doesn't work because of rounding error. This method is more
589 // accurate
590
591 // find the least common multiple of the rules' base values
592 // and multiply this by the number being formatted. This is
593 // all the precision we need, and we can do all of the rest
594 // of the math using integer arithmetic
595 int64_t leastCommonMultiple = rules[0]->getBaseValue();
596 int64_t numerator;
597 {
598 for (uint32_t i = 1; i < rules.size(); ++i) {
7
Assuming the condition is true
8
Loop condition is true. Entering loop body
599 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
9
Calling 'util_lcm'
600 }
601 numerator = util64_fromDouble(number * static_cast<double>(leastCommonMultiple) + 0.5);
602 }
603 // for each rule, do the following...
604 int64_t tempDifference;
605 int64_t difference = util64_fromDouble(uprv_maxMantissauprv_maxMantissa_77());
606 int32_t winner = 0;
607 for (uint32_t i = 0; i < rules.size(); ++i) {
608 // "numerator" is the numerator of the fraction if the
609 // denominator is the LCD. The numerator if the rule's
610 // base value is the denominator is "numerator" times the
611 // base value divided bythe LCD. Here we check to see if
612 // that's an integer, and if not, how close it is to being
613 // an integer.
614 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
615
616
617 // normalize the result of the above calculation: we want
618 // the numerator's distance from the CLOSEST multiple
619 // of the LCD
620 if (leastCommonMultiple - tempDifference < tempDifference) {
621 tempDifference = leastCommonMultiple - tempDifference;
622 }
623
624 // if this is as close as we've come, keep track of how close
625 // that is, and the line number of the rule that did it. If
626 // we've scored a direct hit, we don't have to look at any more
627 // rules
628 if (tempDifference < difference) {
629 difference = tempDifference;
630 winner = i;
631 if (difference == 0) {
632 break;
633 }
634 }
635 }
636
637 // if we have two successive rules that both have the winning base
638 // value, then the first one (the one we found above) is used if
639 // the numerator of the fraction is 1 and the second one is used if
640 // the numerator of the fraction is anything else (this lets us
641 // do things like "one third"/"two thirds" without having to define
642 // a whole bunch of extra rule sets)
643 if (static_cast<unsigned>(winner + 1) < rules.size() &&
644 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
645 double n = static_cast<double>(rules[winner]->getBaseValue()) * number;
646 if (n < 0.5 || n >= 2) {
647 ++winner;
648 }
649 }
650
651 // finally, return the winning rule
652 return rules[winner];
653}
654
655/**
656 * Parses a string. Matches the string to be parsed against each
657 * of its rules (with a base value less than upperBound) and returns
658 * the value produced by the rule that matched the most characters
659 * in the source string.
660 * @param text The string to parse
661 * @param parsePosition The initial position is ignored and assumed
662 * to be 0. On exit, this object has been updated to point to the
663 * first character position this rule set didn't consume.
664 * @param upperBound Limits the rules that can be allowed to match.
665 * Only rules whose base values are strictly less than upperBound
666 * are considered.
667 * @return The numerical result of parsing this string. This will
668 * be the matching rule's base value, composed appropriately with
669 * the results of matching any of its substitutions. The object
670 * will be an instance of Long if it's an integral value; otherwise,
671 * it will be an instance of Double. This function always returns
672 * a valid object: If nothing matched the input string at all,
673 * this function returns new Long(0), and the parse position is
674 * left unchanged.
675 */
676#ifdef RBNF_DEBUG
677#include <stdio.h>
678
679static void dumpUS(FILE* f, const UnicodeString& us) {
680 int len = us.length();
681 char* buf = (char *)uprv_mallocuprv_malloc_77((len+1)*sizeof(char)); //new char[len+1];
682 if (buf != nullptr) {
683 us.extract(0, len, buf);
684 buf[len] = 0;
685 fprintf(f, "%s", buf);
686 uprv_freeuprv_free_77(buf); //delete[] buf;
687 }
688}
689#endif
690
691UBool
692NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, int32_t recursionCount, Formattable& result) const
693{
694 // try matching each rule in the rule set against the text being
695 // parsed. Whichever one matches the most characters is the one
696 // that determines the value we return.
697
698 result.setLong(0);
699
700 // dump out if we've reached the recursion limit
701 if (recursionCount >= RECURSION_LIMIT64) {
702 // stop recursion
703 return false;
704 }
705
706 // dump out if there's no text to parse
707 if (text.length() == 0) {
708 return 0;
709 }
710
711 ParsePosition highWaterMark;
712 ParsePosition workingPos = pos;
713
714#ifdef RBNF_DEBUG
715 fprintf(stderrstderr, "<nfrs> %x '", this);
716 dumpUS(stderrstderr, name);
717 fprintf(stderrstderr, "' text '");
718 dumpUS(stderrstderr, text);
719 fprintf(stderrstderr, "'\n");
720 fprintf(stderrstderr, " parse negative: %d\n", this, negativeNumberRule != 0);
721#endif
722 // Try each of the negative rules, fraction rules, infinity rules and NaN rules
723 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
724 if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
725 // Mark this rule as being executed so that we don't try to execute it again.
726 nonNumericalExecutedRuleMask |= 1 << i;
727
728 Formattable tempResult;
729 UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
730 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
731 result = tempResult;
732 highWaterMark = workingPos;
733 }
734 workingPos = pos;
735 }
736 }
737#ifdef RBNF_DEBUG
738 fprintf(stderrstderr, "<nfrs> continue other with text '");
739 dumpUS(stderrstderr, text);
740 fprintf(stderrstderr, "' hwm: %d\n", highWaterMark.getIndex());
741#endif
742
743 // finally, go through the regular rules one at a time. We start
744 // at the end of the list because we want to try matching the most
745 // sigificant rule first (this helps ensure that we parse
746 // "five thousand three hundred six" as
747 // "(five thousand) (three hundred) (six)" rather than
748 // "((five thousand three) hundred) (six)"). Skip rules whose
749 // base values are higher than the upper bound (again, this helps
750 // limit ambiguity by making sure the rules that match a rule's
751 // are less significant than the rule containing the substitutions)/
752 {
753 int64_t ub = util64_fromDouble(upperBound);
754#ifdef RBNF_DEBUG
755 {
756 char ubstr[64];
757 util64_toa(ub, ubstr, 64);
758 char ubstrhex[64];
759 util64_toa(ub, ubstrhex, 64, 16);
760 fprintf(stderrstderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
761 }
762#endif
763 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
764 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
765 continue;
766 }
767 Formattable tempResult;
768 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
769 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
770 result = tempResult;
771 highWaterMark = workingPos;
772 }
773 workingPos = pos;
774 }
775 }
776#ifdef RBNF_DEBUG
777 fprintf(stderrstderr, "<nfrs> exit\n");
778#endif
779 // finally, update the parse position we were passed to point to the
780 // first character we didn't use, and return the result that
781 // corresponds to that string of characters
782 pos = highWaterMark;
783
784 return 1;
785}
786
787void
788NFRuleSet::appendRules(UnicodeString& result) const
789{
790 uint32_t i;
791
792 // the rule set name goes first...
793 result.append(name);
794 result.append(gColon);
795 result.append(gLineFeed);
796
797 // followed by the regular rules...
798 for (i = 0; i < rules.size(); i++) {
799 rules[i]->_appendRuleText(result);
800 result.append(gLineFeed);
801 }
802
803 // followed by the special rules (if they exist)
804 for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
805 NFRule *rule = nonNumericalRules[i];
806 if (nonNumericalRules[i]) {
807 if (rule->getBaseValue() == NFRule::kImproperFractionRule
808 || rule->getBaseValue() == NFRule::kProperFractionRule
809 || rule->getBaseValue() == NFRule::kDefaultRule)
810 {
811 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
812 NFRule *fractionRule = fractionRules[fIdx];
813 if (fractionRule->getBaseValue() == rule->getBaseValue()) {
814 fractionRule->_appendRuleText(result);
815 result.append(gLineFeed);
816 }
817 }
818 }
819 else {
820 rule->_appendRuleText(result);
821 result.append(gLineFeed);
822 }
823 }
824 }
825}
826
827// utility functions
828
829int64_t util64_fromDouble(double d) {
830 int64_t result = 0;
831 if (!uprv_isNaNuprv_isNaN_77(d)) {
832 double mant = uprv_maxMantissauprv_maxMantissa_77();
833 if (d < -mant) {
834 d = -mant;
835 } else if (d > mant) {
836 d = mant;
837 }
838 UBool neg = d < 0;
839 if (neg) {
840 d = -d;
841 }
842 result = static_cast<int64_t>(uprv_flooruprv_floor_77(d));
843 if (neg) {
844 result = -result;
845 }
846 }
847 return result;
848}
849
850uint64_t util64_pow(uint32_t base, uint16_t exponent) {
851 if (base == 0) {
852 return 0;
853 }
854 uint64_t result = 1;
855 uint64_t pow = base;
856 while (true) {
857 if ((exponent & 1) == 1) {
858 result *= pow;
859 }
860 exponent >>= 1;
861 if (exponent == 0) {
862 break;
863 }
864 pow *= pow;
865 }
866 return result;
867}
868
869static const uint8_t asciiDigits[] = {
870 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
871 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
872 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
873 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
874 0x77u, 0x78u, 0x79u, 0x7au,
875};
876
877static const char16_t kUMinus = static_cast<char16_t>(0x002d);
878
879#ifdef RBNF_DEBUG
880static const char kMinus = '-';
881
882static const uint8_t digitInfo[] = {
883 0, 0, 0, 0, 0, 0, 0, 0,
884 0, 0, 0, 0, 0, 0, 0, 0,
885 0, 0, 0, 0, 0, 0, 0, 0,
886 0, 0, 0, 0, 0, 0, 0, 0,
887 0, 0, 0, 0, 0, 0, 0, 0,
888 0, 0, 0, 0, 0, 0, 0, 0,
889 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
890 0x88u, 0x89u, 0, 0, 0, 0, 0, 0,
891 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
892 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
893 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
894 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
895 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
896 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
897 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
898 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
899};
900
901int64_t util64_atoi(const char* str, uint32_t radix)
902{
903 if (radix > 36) {
904 radix = 36;
905 } else if (radix < 2) {
906 radix = 2;
907 }
908 int64_t lradix = radix;
909
910 int neg = 0;
911 if (*str == kMinus) {
912 ++str;
913 neg = 1;
914 }
915 int64_t result = 0;
916 uint8_t b;
917 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
918 result *= lradix;
919 result += (int32_t)b;
920 }
921 if (neg) {
922 result = -result;
923 }
924 return result;
925}
926
927int64_t util64_utoi(const char16_t* str, uint32_t radix)
928{
929 if (radix > 36) {
930 radix = 36;
931 } else if (radix < 2) {
932 radix = 2;
933 }
934 int64_t lradix = radix;
935
936 int neg = 0;
937 if (*str == kUMinus) {
938 ++str;
939 neg = 1;
940 }
941 int64_t result = 0;
942 char16_t c;
943 uint8_t b;
944 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
945 result *= lradix;
946 result += (int32_t)b;
947 }
948 if (neg) {
949 result = -result;
950 }
951 return result;
952}
953
954uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
955{
956 if (radix > 36) {
957 radix = 36;
958 } else if (radix < 2) {
959 radix = 2;
960 }
961 int64_t base = radix;
962
963 char* p = buf;
964 if (len && (w < 0) && (radix == 10) && !raw) {
965 w = -w;
966 *p++ = kMinus;
967 --len;
968 } else if (len && (w == 0)) {
969 *p++ = (char)raw ? 0 : asciiDigits[0];
970 --len;
971 }
972
973 while (len && w != 0) {
974 int64_t n = w / base;
975 int64_t m = n * base;
976 int32_t d = (int32_t)(w-m);
977 *p++ = raw ? (char)d : asciiDigits[d];
978 w = n;
979 --len;
980 }
981 if (len) {
982 *p = 0; // null terminate if room for caller convenience
983 }
984
985 len = p - buf;
986 if (*buf == kMinus) {
987 ++buf;
988 }
989 while (--p > buf) {
990 char c = *p;
991 *p = *buf;
992 *buf = c;
993 ++buf;
994 }
995
996 return len;
997}
998#endif
999
1000uint32_t util64_tou(int64_t w, char16_t* buf, uint32_t len, uint32_t radix, UBool raw)
1001{
1002 if (radix > 36) {
1003 radix = 36;
1004 } else if (radix < 2) {
1005 radix = 2;
1006 }
1007 int64_t base = radix;
1008
1009 char16_t* p = buf;
1010 if (len && (w < 0) && (radix == 10) && !raw) {
1011 w = -w;
1012 *p++ = kUMinus;
1013 --len;
1014 } else if (len && (w == 0)) {
1015 *p++ = static_cast<char16_t>(raw) ? 0 : asciiDigits[0];
1016 --len;
1017 }
1018
1019 while (len && (w != 0)) {
1020 int64_t n = w / base;
1021 int64_t m = n * base;
1022 int32_t d = static_cast<int32_t>(w - m);
1023 *p++ = static_cast<char16_t>(raw ? d : asciiDigits[d]);
1024 w = n;
1025 --len;
1026 }
1027 if (len) {
1028 *p = 0; // null terminate if room for caller convenience
1029 }
1030
1031 len = static_cast<uint32_t>(p - buf);
1032 if (*buf == kUMinus) {
1033 ++buf;
1034 }
1035 while (--p > buf) {
1036 char16_t c = *p;
1037 *p = *buf;
1038 *buf = c;
1039 ++buf;
1040 }
1041
1042 return len;
1043}
1044
1045
1046U_NAMESPACE_END}
1047
1048/* U_HAVE_RBNF */
1049#endif