| File: | root/firefox-clang/intl/icu/source/i18n/collationweights.cpp |
| Warning: | line 85, column 55 The result of left shift is undefined because the right operand '32' is not smaller than 32, the capacity of 'unsigned int' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | // © 2016 and later: Unicode, Inc. and others. | |||
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | |||
| 3 | /* | |||
| 4 | ******************************************************************************* | |||
| 5 | * | |||
| 6 | * Copyright (C) 1999-2015, International Business Machines | |||
| 7 | * Corporation and others. All Rights Reserved. | |||
| 8 | * | |||
| 9 | ******************************************************************************* | |||
| 10 | * file name: collationweights.cpp | |||
| 11 | * encoding: UTF-8 | |||
| 12 | * tab size: 8 (not used) | |||
| 13 | * indentation:4 | |||
| 14 | * | |||
| 15 | * created on: 2001mar08 as ucol_wgt.cpp | |||
| 16 | * created by: Markus W. Scherer | |||
| 17 | * | |||
| 18 | * This file contains code for allocating n collation element weights | |||
| 19 | * between two exclusive limits. | |||
| 20 | * It is used only internally by the collation tailoring builder. | |||
| 21 | */ | |||
| 22 | ||||
| 23 | #include "unicode/utypes.h" | |||
| 24 | ||||
| 25 | #if !UCONFIG_NO_COLLATION0 | |||
| 26 | ||||
| 27 | #include "cmemory.h" | |||
| 28 | #include "collation.h" | |||
| 29 | #include "collationweights.h" | |||
| 30 | #include "uarrsort.h" | |||
| 31 | #include "uassert.h" | |||
| 32 | ||||
| 33 | #ifdef UCOL_DEBUG | |||
| 34 | # include <stdio.h> | |||
| 35 | #endif | |||
| 36 | ||||
| 37 | U_NAMESPACE_BEGINnamespace icu_77 { | |||
| 38 | ||||
| 39 | /* collation element weight allocation -------------------------------------- */ | |||
| 40 | ||||
| 41 | /* helper functions for CE weights */ | |||
| 42 | ||||
| 43 | static inline uint32_t | |||
| 44 | getWeightTrail(uint32_t weight, int32_t length) { | |||
| 45 | return (weight >> (8 * (4 - length))) & 0xff; | |||
| 46 | } | |||
| 47 | ||||
| 48 | static inline uint32_t | |||
| 49 | setWeightTrail(uint32_t weight, int32_t length, uint32_t trail) { | |||
| 50 | length=8*(4-length); | |||
| 51 | return static_cast<uint32_t>((weight & (0xffffff00 << length)) | (trail << length)); | |||
| 52 | } | |||
| 53 | ||||
| 54 | static inline uint32_t | |||
| 55 | getWeightByte(uint32_t weight, int32_t idx) { | |||
| 56 | return getWeightTrail(weight, idx); /* same calculation */ | |||
| 57 | } | |||
| 58 | ||||
| 59 | static inline uint32_t | |||
| 60 | setWeightByte(uint32_t weight, int32_t idx, uint32_t byte) { | |||
| 61 | uint32_t mask; /* 0xffffffff except a 00 "hole" for the index-th byte */ | |||
| 62 | ||||
| 63 | idx*=8; | |||
| 64 | if(idx<32) { | |||
| 65 | mask = (static_cast<uint32_t>(0xffffffff)) >> idx; | |||
| 66 | } else { | |||
| 67 | // Do not use uint32_t>>32 because on some platforms that does not shift at all | |||
| 68 | // while we need it to become 0. | |||
| 69 | // PowerPC: 0xffffffff>>32 = 0 (wanted) | |||
| 70 | // x86: 0xffffffff>>32 = 0xffffffff (not wanted) | |||
| 71 | // | |||
| 72 | // ANSI C99 6.5.7 Bitwise shift operators: | |||
| 73 | // "If the value of the right operand is negative | |||
| 74 | // or is greater than or equal to the width of the promoted left operand, | |||
| 75 | // the behavior is undefined." | |||
| 76 | mask=0; | |||
| 77 | } | |||
| 78 | idx=32-idx; | |||
| 79 | mask|=0xffffff00<<idx; | |||
| 80 | return ((weight & mask) | (byte << idx)); | |||
| 81 | } | |||
| 82 | ||||
| 83 | static inline uint32_t | |||
| 84 | truncateWeight(uint32_t weight, int32_t length) { | |||
| 85 | return static_cast<uint32_t>(weight & (0xffffffff << (8 * (4 - length)))); | |||
| ||||
| 86 | } | |||
| 87 | ||||
| 88 | static inline uint32_t | |||
| 89 | incWeightTrail(uint32_t weight, int32_t length) { | |||
| 90 | return static_cast<uint32_t>(weight + (1UL << (8 * (4 - length)))); | |||
| 91 | } | |||
| 92 | ||||
| 93 | static inline uint32_t | |||
| 94 | decWeightTrail(uint32_t weight, int32_t length) { | |||
| 95 | return static_cast<uint32_t>(weight - (1UL << (8 * (4 - length)))); | |||
| 96 | } | |||
| 97 | ||||
| 98 | CollationWeights::CollationWeights() | |||
| 99 | : middleLength(0), rangeIndex(0), rangeCount(0) { | |||
| 100 | for(int32_t i = 0; i < 5; ++i) { | |||
| 101 | minBytes[i] = maxBytes[i] = 0; | |||
| 102 | } | |||
| 103 | } | |||
| 104 | ||||
| 105 | void | |||
| 106 | CollationWeights::initForPrimary(UBool compressible) { | |||
| 107 | middleLength=1; | |||
| 108 | minBytes[1] = Collation::MERGE_SEPARATOR_BYTE + 1; | |||
| 109 | maxBytes[1] = Collation::TRAIL_WEIGHT_BYTE; | |||
| 110 | if(compressible) { | |||
| 111 | minBytes[2] = Collation::PRIMARY_COMPRESSION_LOW_BYTE + 1; | |||
| 112 | maxBytes[2] = Collation::PRIMARY_COMPRESSION_HIGH_BYTE - 1; | |||
| 113 | } else { | |||
| 114 | minBytes[2] = 2; | |||
| 115 | maxBytes[2] = 0xff; | |||
| 116 | } | |||
| 117 | minBytes[3] = 2; | |||
| 118 | maxBytes[3] = 0xff; | |||
| 119 | minBytes[4] = 2; | |||
| 120 | maxBytes[4] = 0xff; | |||
| 121 | } | |||
| 122 | ||||
| 123 | void | |||
| 124 | CollationWeights::initForSecondary() { | |||
| 125 | // We use only the lower 16 bits for secondary weights. | |||
| 126 | middleLength=3; | |||
| 127 | minBytes[1] = 0; | |||
| 128 | maxBytes[1] = 0; | |||
| 129 | minBytes[2] = 0; | |||
| 130 | maxBytes[2] = 0; | |||
| 131 | minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1; | |||
| 132 | maxBytes[3] = 0xff; | |||
| 133 | minBytes[4] = 2; | |||
| 134 | maxBytes[4] = 0xff; | |||
| 135 | } | |||
| 136 | ||||
| 137 | void | |||
| 138 | CollationWeights::initForTertiary() { | |||
| 139 | // We use only the lower 16 bits for tertiary weights. | |||
| 140 | middleLength=3; | |||
| 141 | minBytes[1] = 0; | |||
| 142 | maxBytes[1] = 0; | |||
| 143 | minBytes[2] = 0; | |||
| 144 | maxBytes[2] = 0; | |||
| 145 | // We use only 6 bits per byte. | |||
| 146 | // The other bits are used for case & quaternary weights. | |||
| 147 | minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1; | |||
| 148 | maxBytes[3] = 0x3f; | |||
| 149 | minBytes[4] = 2; | |||
| 150 | maxBytes[4] = 0x3f; | |||
| 151 | } | |||
| 152 | ||||
| 153 | uint32_t | |||
| 154 | CollationWeights::incWeight(uint32_t weight, int32_t length) const { | |||
| 155 | for(;;) { | |||
| 156 | uint32_t byte=getWeightByte(weight, length); | |||
| 157 | if(byte<maxBytes[length]) { | |||
| 158 | return setWeightByte(weight, length, byte+1); | |||
| 159 | } else { | |||
| 160 | // Roll over, set this byte to the minimum and increment the previous one. | |||
| 161 | weight=setWeightByte(weight, length, minBytes[length]); | |||
| 162 | --length; | |||
| 163 | U_ASSERT(length > 0)(static_cast <bool> (length > 0) ? void (0) : __assert_fail ("length > 0", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
| 164 | } | |||
| 165 | } | |||
| 166 | } | |||
| 167 | ||||
| 168 | uint32_t | |||
| 169 | CollationWeights::incWeightByOffset(uint32_t weight, int32_t length, int32_t offset) const { | |||
| 170 | for(;;) { | |||
| 171 | offset += getWeightByte(weight, length); | |||
| 172 | if (static_cast<uint32_t>(offset) <= maxBytes[length]) { | |||
| 173 | return setWeightByte(weight, length, offset); | |||
| 174 | } else { | |||
| 175 | // Split the offset between this byte and the previous one. | |||
| 176 | offset -= minBytes[length]; | |||
| 177 | weight = setWeightByte(weight, length, minBytes[length] + offset % countBytes(length)); | |||
| 178 | offset /= countBytes(length); | |||
| 179 | --length; | |||
| 180 | U_ASSERT(length > 0)(static_cast <bool> (length > 0) ? void (0) : __assert_fail ("length > 0", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
| 181 | } | |||
| 182 | } | |||
| 183 | } | |||
| 184 | ||||
| 185 | void | |||
| 186 | CollationWeights::lengthenRange(WeightRange &range) const { | |||
| 187 | int32_t length=range.length+1; | |||
| 188 | range.start=setWeightTrail(range.start, length, minBytes[length]); | |||
| 189 | range.end=setWeightTrail(range.end, length, maxBytes[length]); | |||
| 190 | range.count*=countBytes(length); | |||
| 191 | range.length=length; | |||
| 192 | } | |||
| 193 | ||||
| 194 | /* for uprv_sortArray: sort ranges in weight order */ | |||
| 195 | static int32_t U_CALLCONV | |||
| 196 | compareRanges(const void * /*context*/, const void *left, const void *right) { | |||
| 197 | uint32_t l, r; | |||
| 198 | ||||
| 199 | l = static_cast<const CollationWeights::WeightRange*>(left)->start; | |||
| 200 | r = static_cast<const CollationWeights::WeightRange*>(right)->start; | |||
| 201 | if(l<r) { | |||
| 202 | return -1; | |||
| 203 | } else if(l>r) { | |||
| 204 | return 1; | |||
| 205 | } else { | |||
| 206 | return 0; | |||
| 207 | } | |||
| 208 | } | |||
| 209 | ||||
| 210 | UBool | |||
| 211 | CollationWeights::getWeightRanges(uint32_t lowerLimit, uint32_t upperLimit) { | |||
| 212 | U_ASSERT(lowerLimit != 0)(static_cast <bool> (lowerLimit != 0) ? void (0) : __assert_fail ("lowerLimit != 0", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
| 213 | U_ASSERT(upperLimit != 0)(static_cast <bool> (upperLimit != 0) ? void (0) : __assert_fail ("upperLimit != 0", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
| 214 | ||||
| 215 | /* get the lengths of the limits */ | |||
| 216 | int32_t lowerLength=lengthOfWeight(lowerLimit); | |||
| 217 | int32_t upperLength=lengthOfWeight(upperLimit); | |||
| 218 | ||||
| 219 | #ifdef UCOL_DEBUG | |||
| 220 | printf("length of lower limit 0x%08lx is %ld\n", lowerLimit, lowerLength); | |||
| 221 | printf("length of upper limit 0x%08lx is %ld\n", upperLimit, upperLength); | |||
| 222 | #endif | |||
| 223 | U_ASSERT(lowerLength>=middleLength)(static_cast <bool> (lowerLength>=middleLength) ? void (0) : __assert_fail ("lowerLength>=middleLength", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
| 224 | // Permit upperLength<middleLength: The upper limit for secondaries is 0x10000. | |||
| 225 | ||||
| 226 | if(lowerLimit>=upperLimit) { | |||
| 227 | #ifdef UCOL_DEBUG | |||
| 228 | printf("error: no space between lower & upper limits\n"); | |||
| 229 | #endif | |||
| 230 | return false; | |||
| 231 | } | |||
| 232 | ||||
| 233 | /* check that neither is a prefix of the other */ | |||
| 234 | if(lowerLength
| |||
| 235 | if(lowerLimit==truncateWeight(upperLimit, lowerLength)) { | |||
| 236 | #ifdef UCOL_DEBUG | |||
| 237 | printf("error: lower limit 0x%08lx is a prefix of upper limit 0x%08lx\n", lowerLimit, upperLimit); | |||
| 238 | #endif | |||
| 239 | return false; | |||
| 240 | } | |||
| 241 | } | |||
| 242 | /* if the upper limit is a prefix of the lower limit then the earlier test lowerLimit>=upperLimit has caught it */ | |||
| 243 | ||||
| 244 | WeightRange lower[5], middle, upper[5]; /* [0] and [1] are not used - this simplifies indexing */ | |||
| 245 | uprv_memset(lower, 0, sizeof(lower)):: memset(lower, 0, sizeof(lower)); | |||
| 246 | uprv_memset(&middle, 0, sizeof(middle)):: memset(&middle, 0, sizeof(middle)); | |||
| 247 | uprv_memset(upper, 0, sizeof(upper)):: memset(upper, 0, sizeof(upper)); | |||
| 248 | ||||
| 249 | /* | |||
| 250 | * With the limit lengths of 1..4, there are up to 7 ranges for allocation: | |||
| 251 | * range minimum length | |||
| 252 | * lower[4] 4 | |||
| 253 | * lower[3] 3 | |||
| 254 | * lower[2] 2 | |||
| 255 | * middle 1 | |||
| 256 | * upper[2] 2 | |||
| 257 | * upper[3] 3 | |||
| 258 | * upper[4] 4 | |||
| 259 | * | |||
| 260 | * We are now going to calculate up to 7 ranges. | |||
| 261 | * Some of them will typically overlap, so we will then have to merge and eliminate ranges. | |||
| 262 | */ | |||
| 263 | uint32_t weight=lowerLimit; | |||
| 264 | for(int32_t length=lowerLength; length>middleLength; --length) { | |||
| 265 | uint32_t trail=getWeightTrail(weight, length); | |||
| 266 | if(trail<maxBytes[length]) { | |||
| 267 | lower[length].start=incWeightTrail(weight, length); | |||
| 268 | lower[length].end=setWeightTrail(weight, length, maxBytes[length]); | |||
| 269 | lower[length].length=length; | |||
| 270 | lower[length].count=maxBytes[length]-trail; | |||
| 271 | } | |||
| 272 | weight=truncateWeight(weight, length-1); | |||
| 273 | } | |||
| 274 | if(weight<0xff000000) { | |||
| 275 | middle.start=incWeightTrail(weight, middleLength); | |||
| 276 | } else { | |||
| 277 | // Prevent overflow for primary lead byte FF | |||
| 278 | // which would yield a middle range starting at 0. | |||
| 279 | middle.start=0xffffffff; // no middle range | |||
| 280 | } | |||
| 281 | ||||
| 282 | weight=upperLimit; | |||
| 283 | for(int32_t length=upperLength; length>middleLength; --length) { | |||
| 284 | uint32_t trail=getWeightTrail(weight, length); | |||
| 285 | if(trail>minBytes[length]) { | |||
| 286 | upper[length].start=setWeightTrail(weight, length, minBytes[length]); | |||
| 287 | upper[length].end=decWeightTrail(weight, length); | |||
| 288 | upper[length].length=length; | |||
| 289 | upper[length].count=trail-minBytes[length]; | |||
| 290 | } | |||
| 291 | weight=truncateWeight(weight, length-1); | |||
| 292 | } | |||
| 293 | middle.end=decWeightTrail(weight, middleLength); | |||
| 294 | ||||
| 295 | /* set the middle range */ | |||
| 296 | middle.length=middleLength; | |||
| 297 | if(middle.end>=middle.start) { | |||
| 298 | middle.count = static_cast<int32_t>((middle.end - middle.start) >> (8 * (4 - middleLength))) + 1; | |||
| 299 | } else { | |||
| 300 | /* no middle range, eliminate overlaps */ | |||
| 301 | for(int32_t length=4; length>middleLength; --length) { | |||
| 302 | if(lower[length].count>0 && upper[length].count>0) { | |||
| 303 | // Note: The lowerEnd and upperStart weights are versions of | |||
| 304 | // lowerLimit and upperLimit (which are lowerLimit<upperLimit), | |||
| 305 | // truncated (still less-or-equal) | |||
| 306 | // and then with their last bytes changed to the | |||
| 307 | // maxByte (for lowerEnd) or minByte (for upperStart). | |||
| 308 | const uint32_t lowerEnd=lower[length].end; | |||
| 309 | const uint32_t upperStart=upper[length].start; | |||
| 310 | UBool merged=false; | |||
| 311 | ||||
| 312 | if(lowerEnd>upperStart) { | |||
| 313 | // These two lower and upper ranges collide. | |||
| 314 | // Since lowerLimit<upperLimit and lowerEnd and upperStart | |||
| 315 | // are versions with only their last bytes modified | |||
| 316 | // (and following ones removed/reset to 0), | |||
| 317 | // lowerEnd>upperStart is only possible | |||
| 318 | // if the leading bytes are equal | |||
| 319 | // and lastByte(lowerEnd)>lastByte(upperStart). | |||
| 320 | U_ASSERT(truncateWeight(lowerEnd, length-1)==(static_cast <bool> (truncateWeight(lowerEnd, length-1) == truncateWeight(upperStart, length-1)) ? void (0) : __assert_fail ("truncateWeight(lowerEnd, length-1)== truncateWeight(upperStart, length-1)" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )) | |||
| 321 | truncateWeight(upperStart, length-1))(static_cast <bool> (truncateWeight(lowerEnd, length-1) == truncateWeight(upperStart, length-1)) ? void (0) : __assert_fail ("truncateWeight(lowerEnd, length-1)== truncateWeight(upperStart, length-1)" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
| 322 | // Intersect these two ranges. | |||
| 323 | lower[length].end=upper[length].end; | |||
| 324 | lower[length].count= | |||
| 325 | static_cast<int32_t>(getWeightTrail(lower[length].end, length)) - | |||
| 326 | static_cast<int32_t>(getWeightTrail(lower[length].start, length)) + 1; | |||
| 327 | // count might be <=0 in which case there is no room, | |||
| 328 | // and the range-collecting code below will ignore this range. | |||
| 329 | merged=true; | |||
| 330 | } else if(lowerEnd==upperStart) { | |||
| 331 | // Not possible, unless minByte==maxByte which is not allowed. | |||
| 332 | U_ASSERT(minBytes[length]<maxBytes[length])(static_cast <bool> (minBytes[length]<maxBytes[length ]) ? void (0) : __assert_fail ("minBytes[length]<maxBytes[length]" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
| 333 | } else /* lowerEnd<upperStart */ { | |||
| 334 | if(incWeight(lowerEnd, length)==upperStart) { | |||
| 335 | // Merge adjacent ranges. | |||
| 336 | lower[length].end=upper[length].end; | |||
| 337 | lower[length].count+=upper[length].count; // might be >countBytes | |||
| 338 | merged=true; | |||
| 339 | } | |||
| 340 | } | |||
| 341 | if(merged) { | |||
| 342 | // Remove all shorter ranges. | |||
| 343 | // There was no room available for them between the ranges we just merged. | |||
| 344 | upper[length].count=0; | |||
| 345 | while(--length>middleLength) { | |||
| 346 | lower[length].count=upper[length].count=0; | |||
| 347 | } | |||
| 348 | break; | |||
| 349 | } | |||
| 350 | } | |||
| 351 | } | |||
| 352 | } | |||
| 353 | ||||
| 354 | #ifdef UCOL_DEBUG | |||
| 355 | /* print ranges */ | |||
| 356 | for(int32_t length=4; length>=2; --length) { | |||
| 357 | if(lower[length].count>0) { | |||
| 358 | printf("lower[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, lower[length].start, lower[length].end, lower[length].count); | |||
| 359 | } | |||
| 360 | } | |||
| 361 | if(middle.count>0) { | |||
| 362 | printf("middle .start=0x%08lx .end=0x%08lx .count=%ld\n", middle.start, middle.end, middle.count); | |||
| 363 | } | |||
| 364 | for(int32_t length=2; length<=4; ++length) { | |||
| 365 | if(upper[length].count>0) { | |||
| 366 | printf("upper[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, upper[length].start, upper[length].end, upper[length].count); | |||
| 367 | } | |||
| 368 | } | |||
| 369 | #endif | |||
| 370 | ||||
| 371 | /* copy the ranges, shortest first, into the result array */ | |||
| 372 | rangeCount=0; | |||
| 373 | if(middle.count>0) { | |||
| 374 | uprv_memcpy(ranges, &middle, sizeof(WeightRange))do { clang diagnostic push
clang diagnostic ignored "-Waddress" (static_cast <bool> (ranges != __null) ? void (0) : __assert_fail ("ranges != __null", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); (static_cast <bool> (&middle != __null) ? void (0) : __assert_fail ("&middle != __null" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); clang diagnostic pop :: memcpy(ranges, &middle, sizeof (WeightRange)); } while (false); | |||
| 375 | rangeCount=1; | |||
| 376 | } | |||
| 377 | for(int32_t length=middleLength+1; length<=4; ++length) { | |||
| 378 | /* copy upper first so that later the middle range is more likely the first one to use */ | |||
| 379 | if(upper[length].count>0) { | |||
| 380 | uprv_memcpy(ranges+rangeCount, upper+length, sizeof(WeightRange))do { clang diagnostic push
clang diagnostic ignored "-Waddress" (static_cast <bool> (ranges+rangeCount != __null) ? void (0) : __assert_fail ("ranges+rangeCount != __null", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); ( static_cast <bool> (upper+length != __null) ? void (0) : __assert_fail ("upper+length != __null", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); clang diagnostic pop :: memcpy(ranges+rangeCount, upper+length, sizeof(WeightRange )); } while (false); | |||
| 381 | ++rangeCount; | |||
| 382 | } | |||
| 383 | if(lower[length].count>0) { | |||
| 384 | uprv_memcpy(ranges+rangeCount, lower+length, sizeof(WeightRange))do { clang diagnostic push
clang diagnostic ignored "-Waddress" (static_cast <bool> (ranges+rangeCount != __null) ? void (0) : __assert_fail ("ranges+rangeCount != __null", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); ( static_cast <bool> (lower+length != __null) ? void (0) : __assert_fail ("lower+length != __null", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); clang diagnostic pop :: memcpy(ranges+rangeCount, lower+length, sizeof(WeightRange )); } while (false); | |||
| 385 | ++rangeCount; | |||
| 386 | } | |||
| 387 | } | |||
| 388 | return rangeCount>0; | |||
| 389 | } | |||
| 390 | ||||
| 391 | UBool | |||
| 392 | CollationWeights::allocWeightsInShortRanges(int32_t n, int32_t minLength) { | |||
| 393 | // See if the first few minLength and minLength+1 ranges have enough weights. | |||
| 394 | for(int32_t i = 0; i < rangeCount && ranges[i].length <= (minLength + 1); ++i) { | |||
| 395 | if(n <= ranges[i].count) { | |||
| 396 | // Use the first few minLength and minLength+1 ranges. | |||
| 397 | if(ranges[i].length > minLength) { | |||
| 398 | // Reduce the number of weights from the last minLength+1 range | |||
| 399 | // which might sort before some minLength ranges, | |||
| 400 | // so that we use all weights in the minLength ranges. | |||
| 401 | ranges[i].count = n; | |||
| 402 | } | |||
| 403 | rangeCount = i + 1; | |||
| 404 | #ifdef UCOL_DEBUG | |||
| 405 | printf("take first %ld ranges\n", rangeCount); | |||
| 406 | #endif | |||
| 407 | ||||
| 408 | if(rangeCount>1) { | |||
| 409 | /* sort the ranges by weight values */ | |||
| 410 | UErrorCode errorCode=U_ZERO_ERROR; | |||
| 411 | uprv_sortArrayuprv_sortArray_77(ranges, rangeCount, sizeof(WeightRange), | |||
| 412 | compareRanges, nullptr, false, &errorCode); | |||
| 413 | /* ignore error code: we know that the internal sort function will not fail here */ | |||
| 414 | } | |||
| 415 | return true; | |||
| 416 | } | |||
| 417 | n -= ranges[i].count; // still >0 | |||
| 418 | } | |||
| 419 | return false; | |||
| 420 | } | |||
| 421 | ||||
| 422 | UBool | |||
| 423 | CollationWeights::allocWeightsInMinLengthRanges(int32_t n, int32_t minLength) { | |||
| 424 | // See if the minLength ranges have enough weights | |||
| 425 | // when we split one and lengthen the following ones. | |||
| 426 | int32_t count = 0; | |||
| 427 | int32_t minLengthRangeCount; | |||
| 428 | for(minLengthRangeCount = 0; | |||
| 429 | minLengthRangeCount < rangeCount && | |||
| 430 | ranges[minLengthRangeCount].length == minLength; | |||
| 431 | ++minLengthRangeCount) { | |||
| 432 | count += ranges[minLengthRangeCount].count; | |||
| 433 | } | |||
| 434 | ||||
| 435 | int32_t nextCountBytes = countBytes(minLength + 1); | |||
| 436 | if(n > count * nextCountBytes) { return false; } | |||
| 437 | ||||
| 438 | // Use the minLength ranges. Merge them, and then split again as necessary. | |||
| 439 | uint32_t start = ranges[0].start; | |||
| 440 | uint32_t end = ranges[0].end; | |||
| 441 | for(int32_t i = 1; i < minLengthRangeCount; ++i) { | |||
| 442 | if(ranges[i].start < start) { start = ranges[i].start; } | |||
| 443 | if(ranges[i].end > end) { end = ranges[i].end; } | |||
| 444 | } | |||
| 445 | ||||
| 446 | // Calculate how to split the range between minLength (count1) and minLength+1 (count2). | |||
| 447 | // Goal: | |||
| 448 | // count1 + count2 * nextCountBytes = n | |||
| 449 | // count1 + count2 = count | |||
| 450 | // These turn into | |||
| 451 | // (count - count2) + count2 * nextCountBytes = n | |||
| 452 | // and then into the following count1 & count2 computations. | |||
| 453 | int32_t count2 = (n - count) / (nextCountBytes - 1); // number of weights to be lengthened | |||
| 454 | int32_t count1 = count - count2; // number of minLength weights | |||
| 455 | if(count2 == 0 || (count1 + count2 * nextCountBytes) < n) { | |||
| 456 | // round up | |||
| 457 | ++count2; | |||
| 458 | --count1; | |||
| 459 | U_ASSERT((count1 + count2 * nextCountBytes) >= n)(static_cast <bool> ((count1 + count2 * nextCountBytes) >= n) ? void (0) : __assert_fail ("(count1 + count2 * nextCountBytes) >= n" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
| 460 | } | |||
| 461 | ||||
| 462 | ranges[0].start = start; | |||
| 463 | ||||
| 464 | if(count1 == 0) { | |||
| 465 | // Make one long range. | |||
| 466 | ranges[0].end = end; | |||
| 467 | ranges[0].count = count; | |||
| 468 | lengthenRange(ranges[0]); | |||
| 469 | rangeCount = 1; | |||
| 470 | } else { | |||
| 471 | // Split the range, lengthen the second part. | |||
| 472 | #ifdef UCOL_DEBUG | |||
| 473 | printf("split the range number %ld (out of %ld minLength ranges) by %ld:%ld\n", | |||
| 474 | splitRange, rangeCount, count1, count2); | |||
| 475 | #endif | |||
| 476 | ||||
| 477 | // Next start = start + count1. First end = 1 before that. | |||
| 478 | ranges[0].end = incWeightByOffset(start, minLength, count1 - 1); | |||
| 479 | ranges[0].count = count1; | |||
| 480 | ||||
| 481 | ranges[1].start = incWeight(ranges[0].end, minLength); | |||
| 482 | ranges[1].end = end; | |||
| 483 | ranges[1].length = minLength; // +1 when lengthened | |||
| 484 | ranges[1].count = count2; // *countBytes when lengthened | |||
| 485 | lengthenRange(ranges[1]); | |||
| 486 | rangeCount = 2; | |||
| 487 | } | |||
| 488 | return true; | |||
| 489 | } | |||
| 490 | ||||
| 491 | /* | |||
| 492 | * call getWeightRanges and then determine heuristically | |||
| 493 | * which ranges to use for a given number of weights between (excluding) | |||
| 494 | * two limits | |||
| 495 | */ | |||
| 496 | UBool | |||
| 497 | CollationWeights::allocWeights(uint32_t lowerLimit, uint32_t upperLimit, int32_t n) { | |||
| 498 | #ifdef UCOL_DEBUG | |||
| 499 | puts(""); | |||
| 500 | #endif | |||
| 501 | ||||
| 502 | if(!getWeightRanges(lowerLimit, upperLimit)) { | |||
| ||||
| 503 | #ifdef UCOL_DEBUG | |||
| 504 | printf("error: unable to get Weight ranges\n"); | |||
| 505 | #endif | |||
| 506 | return false; | |||
| 507 | } | |||
| 508 | ||||
| 509 | /* try until we find suitably large ranges */ | |||
| 510 | for(;;) { | |||
| 511 | /* get the smallest number of bytes in a range */ | |||
| 512 | int32_t minLength=ranges[0].length; | |||
| 513 | ||||
| 514 | if(allocWeightsInShortRanges(n, minLength)) { break; } | |||
| 515 | ||||
| 516 | if(minLength == 4) { | |||
| 517 | #ifdef UCOL_DEBUG | |||
| 518 | printf("error: the maximum number of %ld weights is insufficient for n=%ld\n", | |||
| 519 | minLengthCount, n); | |||
| 520 | #endif | |||
| 521 | return false; | |||
| 522 | } | |||
| 523 | ||||
| 524 | if(allocWeightsInMinLengthRanges(n, minLength)) { break; } | |||
| 525 | ||||
| 526 | /* no good match, lengthen all minLength ranges and iterate */ | |||
| 527 | #ifdef UCOL_DEBUG | |||
| 528 | printf("lengthen the short ranges from %ld bytes to %ld and iterate\n", minLength, minLength+1); | |||
| 529 | #endif | |||
| 530 | for(int32_t i=0; i<rangeCount && ranges[i].length==minLength; ++i) { | |||
| 531 | lengthenRange(ranges[i]); | |||
| 532 | } | |||
| 533 | } | |||
| 534 | ||||
| 535 | #ifdef UCOL_DEBUG | |||
| 536 | puts("final ranges:"); | |||
| 537 | for(int32_t i=0; i<rangeCount; ++i) { | |||
| 538 | printf("ranges[%ld] .start=0x%08lx .end=0x%08lx .length=%ld .count=%ld\n", | |||
| 539 | i, ranges[i].start, ranges[i].end, ranges[i].length, ranges[i].count); | |||
| 540 | } | |||
| 541 | #endif | |||
| 542 | ||||
| 543 | rangeIndex = 0; | |||
| 544 | return true; | |||
| 545 | } | |||
| 546 | ||||
| 547 | uint32_t | |||
| 548 | CollationWeights::nextWeight() { | |||
| 549 | if(rangeIndex >= rangeCount) { | |||
| 550 | return 0xffffffff; | |||
| 551 | } else { | |||
| 552 | /* get the next weight */ | |||
| 553 | WeightRange &range = ranges[rangeIndex]; | |||
| 554 | uint32_t weight = range.start; | |||
| 555 | if(--range.count == 0) { | |||
| 556 | /* this range is finished */ | |||
| 557 | ++rangeIndex; | |||
| 558 | } else { | |||
| 559 | /* increment the weight for the next value */ | |||
| 560 | range.start = incWeight(weight, range.length); | |||
| 561 | U_ASSERT(range.start <= range.end)(static_cast <bool> (range.start <= range.end) ? void (0) : __assert_fail ("range.start <= range.end", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
| 562 | } | |||
| 563 | ||||
| 564 | return weight; | |||
| 565 | } | |||
| 566 | } | |||
| 567 | ||||
| 568 | U_NAMESPACE_END} | |||
| 569 | ||||
| 570 | #endif /* #if !UCONFIG_NO_COLLATION */ |