clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name softfloat.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/root/firefox-clang/third_party/rust/glslopt -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/root/firefox-clang/third_party/rust/glslopt -resource-dir /usr/lib/llvm-21/lib/clang/21 -include /root/firefox-clang/config/gcc_hidden.h -include /root/firefox-clang/obj-x86_64-pc-linux-gnu/mozilla-config.h -I glsl-optimizer/include -I glsl-optimizer/src/mesa -I glsl-optimizer/src/mapi -I glsl-optimizer/src/compiler -I glsl-optimizer/src/compiler/glsl -I glsl-optimizer/src/gallium/auxiliary -I glsl-optimizer/src/gallium/include -I glsl-optimizer/src -I glsl-optimizer/src/util -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -D MOZILLA_CONFIG_H -D __STDC_FORMAT_MACROS -D _GNU_SOURCE -D HAVE_ENDIAN_H -D HAVE_PTHREAD -D HAVE_TIMESPEC_GET -D MOZ_INCLUDE_MOZALLOC_H -D mozilla_throw_gcc_h -internal-isystem /usr/lib/llvm-21/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=tautological-type-limit-compare -Wno-range-loop-analysis -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-unknown-warning-option -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-06-27-100320-3286336-1 -x c glsl-optimizer/src/util/softfloat.c
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | |
25 | |
26 | |
27 | |
28 | |
29 | |
30 | |
31 | |
32 | |
33 | |
34 | |
35 | |
36 | |
37 | |
38 | |
39 | |
40 | |
41 | |
42 | |
43 | |
44 | |
45 | |
46 | #include "rounding.h" |
47 | #include "bitscan.h" |
48 | #include "softfloat.h" |
49 | |
50 | #if defined(BIG_ENDIAN) |
51 | #define word_incr -1 |
52 | #define index_word(total, n) ((total) - 1 - (n)) |
53 | #define index_word_hi(total) 0 |
54 | #define index_word_lo(total) ((total) - 1) |
55 | #define index_multiword_hi(total, n) 0 |
56 | #define index_multiword_lo(total, n) ((total) - (n)) |
57 | #define index_multiword_hi_but(total, n) 0 |
58 | #define index_multiword_lo_but(total, n) (n) |
59 | #else |
60 | #define word_incr 1 |
61 | #define index_word(total, n) (n) |
62 | #define index_word_hi(total) ((total) - 1) |
63 | #define index_word_lo(total) 0 |
64 | #define index_multiword_hi(total, n) ((total) - (n)) |
65 | #define index_multiword_lo(total, n) 0 |
66 | #define index_multiword_hi_but(total, n) (n) |
67 | #define index_multiword_lo_but(total, n) 0 |
68 | #endif |
69 | |
70 | typedef union { double f; int64_t i; uint64_t u; } di_type; |
71 | typedef union { float f; int32_t i; uint32_t u; } fi_type; |
72 | |
73 | const uint8_t count_leading_zeros8[256] = { |
74 | 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, |
75 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
76 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
77 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
78 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
79 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
80 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
81 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
82 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
83 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
84 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
85 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
86 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
87 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
88 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
89 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
90 | }; |
91 | |
92 | |
93 | |
94 | |
95 | |
96 | |
97 | |
98 | |
99 | |
100 | static inline |
101 | uint64_t _mesa_short_shift_right_jam64(uint64_t a, uint8_t dist) |
102 | { |
103 | return a >> dist | ((a & (((uint64_t) 1 << dist) - 1)) != 0); |
104 | } |
105 | |
106 | |
107 | |
108 | |
109 | |
110 | |
111 | |
112 | |
113 | |
114 | |
115 | |
116 | |
117 | static inline |
118 | uint64_t _mesa_shift_right_jam64(uint64_t a, uint32_t dist) |
119 | { |
120 | return |
121 | (dist < 63) ? a >> dist | ((uint64_t) (a << (-dist & 63)) != 0) : (a != 0); |
122 | } |
123 | |
124 | |
125 | |
126 | |
127 | |
128 | |
129 | |
130 | |
131 | |
132 | |
133 | |
134 | |
135 | static inline |
136 | uint32_t _mesa_shift_right_jam32(uint32_t a, uint16_t dist) |
137 | { |
138 | return |
139 | (dist < 31) ? a >> dist | ((uint32_t) (a << (-dist & 31)) != 0) : (a != 0); |
140 | } |
141 | |
142 | |
143 | |
144 | |
145 | static inline |
146 | double _mesa_roundtozero_f64(int64_t s, int64_t e, int64_t m) |
147 | { |
148 | di_type result; |
149 | |
150 | if ((uint64_t) e >= 0x7fd) { |
151 | if (e < 0) { |
152 | m = _mesa_shift_right_jam64(m, -e); |
153 | e = 0; |
154 | } else if ((e > 0x7fd) || (0x8000000000000000 <= m)) { |
155 | e = 0x7ff; |
156 | m = 0; |
157 | result.u = (s << 63) + (e << 52) + m; |
158 | result.u -= 1; |
159 | return result.f; |
160 | } |
161 | } |
162 | |
163 | m >>= 10; |
164 | if (m == 0) |
165 | e = 0; |
166 | |
167 | result.u = (s << 63) + (e << 52) + m; |
168 | return result.f; |
169 | } |
170 | |
171 | |
172 | |
173 | |
174 | static inline |
175 | float _mesa_round_f32(int32_t s, int32_t e, int32_t m, bool rtz) |
176 | { |
177 | fi_type result; |
178 | uint8_t round_increment = rtz ? 0 : 0x40; |
179 | |
180 | if ((uint32_t) e >= 0xfd) { |
181 | if (e < 0) { |
182 | m = _mesa_shift_right_jam32(m, -e); |
183 | e = 0; |
184 | } else if ((e > 0xfd) || (0x80000000 <= m + round_increment)) { |
185 | e = 0xff; |
186 | m = 0; |
187 | result.u = (s << 31) + (e << 23) + m; |
188 | result.u -= !round_increment; |
189 | return result.f; |
190 | } |
191 | } |
192 | |
193 | uint8_t round_bits; |
194 | round_bits = m & 0x7f; |
195 | m = ((uint32_t) m + round_increment) >> 7; |
196 | m &= ~(uint32_t) (! (round_bits ^ 0x40) & !rtz); |
197 | if (m == 0) |
198 | e = 0; |
199 | |
200 | result.u = (s << 31) + (e << 23) + m; |
201 | return result.f; |
202 | } |
203 | |
204 | |
205 | |
206 | |
207 | static inline |
208 | uint16_t _mesa_roundtozero_f16(int16_t s, int16_t e, int16_t m) |
209 | { |
210 | if ((uint16_t) e >= 0x1d) { |
211 | if (e < 0) { |
212 | m = _mesa_shift_right_jam32(m, -e); |
213 | e = 0; |
214 | } else if ((e > 0x1d) || (0x8000 <= m)) { |
215 | e = 0x1f; |
216 | m = 0; |
217 | return (s << 15) + (e << 10) + m - 1; |
218 | } |
219 | } |
220 | |
221 | m >>= 4; |
222 | if (m == 0) |
223 | e = 0; |
224 | |
225 | return (s << 15) + (e << 10) + m; |
226 | } |
227 | |
228 | |
229 | |
230 | |
231 | |
232 | |
233 | |
234 | |
235 | |
236 | |
237 | |
238 | |
239 | static inline void |
240 | _mesa_short_shift_left_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) |
241 | { |
242 | uint8_t neg_dist; |
243 | unsigned index, last_index; |
244 | uint32_t part_word, a_word; |
245 | |
246 | neg_dist = -dist; |
247 | index = index_word_hi(size_words); |
248 | last_index = index_word_lo(size_words); |
249 | part_word = a[index] << dist; |
250 | while (index != last_index) { |
251 | a_word = a[index - word_incr]; |
252 | m_out[index] = part_word | a_word >> (neg_dist & 31); |
253 | index -= word_incr; |
254 | part_word = a_word << dist; |
255 | } |
256 | m_out[index] = part_word; |
257 | } |
258 | |
259 | |
260 | |
261 | |
262 | |
263 | |
264 | |
265 | |
266 | |
267 | |
268 | |
269 | |
270 | |
271 | static inline void |
272 | _mesa_shift_left_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out) |
273 | { |
274 | uint32_t word_dist; |
275 | uint8_t inner_dist; |
276 | uint8_t i; |
277 | |
278 | word_dist = dist >> 5; |
279 | if (word_dist < size_words) { |
280 | a += index_multiword_lo_but(size_words, word_dist); |
281 | inner_dist = dist & 31; |
282 | if (inner_dist) { |
283 | _mesa_short_shift_left_m(size_words - word_dist, a, inner_dist, |
284 | m_out + index_multiword_hi_but(size_words, word_dist)); |
285 | if (!word_dist) |
286 | return; |
287 | } else { |
288 | uint32_t *dest = m_out + index_word_hi(size_words); |
289 | a += index_word_hi(size_words - word_dist); |
290 | for (i = size_words - word_dist; i; --i) { |
291 | *dest = *a; |
292 | a -= word_incr; |
293 | dest -= word_incr; |
294 | } |
295 | } |
296 | m_out += index_multiword_lo(size_words, word_dist); |
297 | } else { |
298 | word_dist = size_words; |
299 | } |
300 | do { |
301 | *m_out++ = 0; |
302 | --word_dist; |
303 | } while (word_dist); |
304 | } |
305 | |
306 | |
307 | |
308 | |
309 | |
310 | |
311 | |
312 | |
313 | |
314 | |
315 | |
316 | |
317 | static inline void |
318 | _mesa_short_shift_right_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) |
319 | { |
320 | uint8_t neg_dist; |
321 | unsigned index, last_index; |
322 | uint32_t part_word, a_word; |
323 | |
324 | neg_dist = -dist; |
325 | index = index_word_lo(size_words); |
326 | last_index = index_word_hi(size_words); |
327 | part_word = a[index] >> dist; |
328 | while (index != last_index) { |
329 | a_word = a[index + word_incr]; |
330 | m_out[index] = a_word << (neg_dist & 31) | part_word; |
331 | index += word_incr; |
332 | part_word = a_word >> dist; |
333 | } |
334 | m_out[index] = part_word; |
335 | } |
336 | |
337 | |
338 | |
339 | |
340 | |
341 | |
342 | |
343 | |
344 | |
345 | |
346 | |
347 | |
348 | |
349 | |
350 | static inline void |
351 | _mesa_short_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) |
352 | { |
353 | uint8_t neg_dist; |
354 | unsigned index, last_index; |
355 | uint64_t part_word, a_word; |
356 | |
357 | neg_dist = -dist; |
358 | index = index_word_lo(size_words); |
359 | last_index = index_word_hi(size_words); |
360 | a_word = a[index]; |
361 | part_word = a_word >> dist; |
362 | if (part_word << dist != a_word ) |
363 | part_word |= 1; |
364 | while (index != last_index) { |
365 | a_word = a[index + word_incr]; |
366 | m_out[index] = a_word << (neg_dist & 31) | part_word; |
367 | index += word_incr; |
368 | part_word = a_word >> dist; |
369 | } |
370 | m_out[index] = part_word; |
371 | } |
372 | |
373 | |
374 | |
375 | |
376 | |
377 | |
378 | |
379 | |
380 | |
381 | |
382 | |
383 | |
384 | |
385 | |
386 | |
387 | |
388 | static inline void |
389 | _mesa_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out) |
390 | { |
391 | uint32_t word_jam, word_dist, *tmp; |
| 1 | 'tmp' declared without an initial value | |
|
392 | uint8_t i, inner_dist; |
393 | |
394 | word_jam = 0; |
395 | word_dist = dist >> 5; |
396 | if (word_dist) { |
| 2 | | Assuming 'word_dist' is 0 | |
|
| |
397 | if (size_words < word_dist) |
398 | word_dist = size_words; |
399 | tmp = (uint32_t *) (a + index_multiword_lo(size_words, word_dist)); |
400 | i = word_dist; |
401 | do { |
402 | word_jam = *tmp++; |
403 | if (word_jam) |
404 | break; |
405 | --i; |
406 | } while (i); |
407 | tmp = m_out; |
408 | } |
409 | if (word_dist < size_words) { |
| 4 | | Assuming 'word_dist' is >= 'size_words' | |
|
| |
410 | a += index_multiword_hi_but(size_words, word_dist); |
411 | inner_dist = dist & 31; |
412 | if (inner_dist) { |
413 | _mesa_short_shift_right_jam_m(size_words - word_dist, a, inner_dist, |
414 | m_out + index_multiword_lo_but(size_words, word_dist)); |
415 | if (!word_dist) { |
416 | if (word_jam) |
417 | m_out[index_word_lo(size_words)] |= 1; |
418 | return; |
419 | } |
420 | } else { |
421 | a += index_word_lo(size_words - word_dist); |
422 | tmp = m_out + index_word_lo(size_words); |
423 | for (i = size_words - word_dist; i; --i) { |
424 | *tmp = *a; |
425 | a += word_incr; |
426 | tmp += word_incr; |
427 | } |
428 | } |
429 | tmp = m_out + index_multiword_hi(size_words, word_dist); |
430 | } |
431 | do { |
432 | *tmp++ = 0; |
| 6 | | The expression uses uninitialized memory |
|
433 | --word_dist; |
434 | } while (word_dist); |
435 | if (word_jam) |
436 | m_out[index_word_lo(size_words)] |= 1; |
437 | } |
438 | |
439 | |
440 | |
441 | |
442 | |
443 | |
444 | |
445 | |
446 | |
447 | |
448 | |
449 | double |
450 | _mesa_double_add_rtz(double a, double b) |
451 | { |
452 | const di_type a_di = {a}; |
453 | uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; |
454 | uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; |
455 | uint64_t a_flt_s = (a_di.u >> 63) & 0x1; |
456 | const di_type b_di = {b}; |
457 | uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; |
458 | uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; |
459 | uint64_t b_flt_s = (b_di.u >> 63) & 0x1; |
460 | int64_t s, e, m = 0; |
461 | |
462 | s = a_flt_s; |
463 | |
464 | const int64_t exp_diff = a_flt_e - b_flt_e; |
465 | |
466 | |
467 | |
468 | if (a_flt_s != b_flt_s) { |
469 | return _mesa_double_sub_rtz(a, -b); |
470 | } else if ((a_flt_e == 0) && (a_flt_m == 0)) { |
471 | |
472 | return b; |
473 | } else if ((b_flt_e == 0) && (b_flt_m == 0)) { |
474 | |
475 | return a; |
476 | } else if (a_flt_e == 0x7ff && a_flt_m != 0) { |
477 | |
478 | return a; |
479 | } else if (b_flt_e == 0x7ff && b_flt_m != 0) { |
480 | |
481 | return b; |
482 | } else if (a_flt_e == 0x7ff && a_flt_m == 0) { |
483 | |
484 | return a; |
485 | } else if (b_flt_e == 0x7ff && b_flt_m == 0) { |
486 | |
487 | return b; |
488 | } else if (exp_diff == 0 && a_flt_e == 0) { |
489 | di_type result_di; |
490 | result_di.u = a_di.u + b_flt_m; |
491 | return result_di.f; |
492 | } else if (exp_diff == 0) { |
493 | e = a_flt_e; |
494 | m = 0x0020000000000000 + a_flt_m + b_flt_m; |
495 | m <<= 9; |
496 | } else if (exp_diff < 0) { |
497 | a_flt_m <<= 9; |
498 | b_flt_m <<= 9; |
499 | e = b_flt_e; |
500 | |
501 | if (a_flt_e != 0) |
502 | a_flt_m += 0x2000000000000000; |
503 | else |
504 | a_flt_m <<= 1; |
505 | |
506 | a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff); |
507 | m = 0x2000000000000000 + a_flt_m + b_flt_m; |
508 | if (m < 0x4000000000000000) { |
509 | --e; |
510 | m <<= 1; |
511 | } |
512 | } else { |
513 | a_flt_m <<= 9; |
514 | b_flt_m <<= 9; |
515 | e = a_flt_e; |
516 | |
517 | if (b_flt_e != 0) |
518 | b_flt_m += 0x2000000000000000; |
519 | else |
520 | b_flt_m <<= 1; |
521 | |
522 | b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff); |
523 | m = 0x2000000000000000 + a_flt_m + b_flt_m; |
524 | if (m < 0x4000000000000000) { |
525 | --e; |
526 | m <<= 1; |
527 | } |
528 | } |
529 | |
530 | return _mesa_roundtozero_f64(s, e, m); |
531 | } |
532 | |
533 | |
534 | |
535 | |
536 | |
537 | static inline unsigned |
538 | _mesa_count_leading_zeros64(uint64_t a) |
539 | { |
540 | return 64 - util_last_bit64(a); |
541 | } |
542 | |
543 | |
544 | |
545 | |
546 | |
547 | static inline unsigned |
548 | _mesa_count_leading_zeros32(uint32_t a) |
549 | { |
550 | return 32 - util_last_bit(a); |
551 | } |
552 | |
553 | static inline double |
554 | _mesa_norm_round_pack_f64(int64_t s, int64_t e, int64_t m) |
555 | { |
556 | int8_t shift_dist; |
557 | |
558 | shift_dist = _mesa_count_leading_zeros64(m) - 1; |
559 | e -= shift_dist; |
560 | if ((10 <= shift_dist) && ((unsigned) e < 0x7fd)) { |
561 | di_type result; |
562 | result.u = (s << 63) + ((m ? e : 0) << 52) + (m << (shift_dist - 10)); |
563 | return result.f; |
564 | } else { |
565 | return _mesa_roundtozero_f64(s, e, m << shift_dist); |
566 | } |
567 | } |
568 | |
569 | |
570 | |
571 | |
572 | |
573 | |
574 | |
575 | |
576 | |
577 | static inline void |
578 | _mesa_neg_x_m(uint8_t size_words, uint32_t *m_out) |
579 | { |
580 | unsigned index, last_index; |
581 | uint8_t carry; |
582 | uint32_t word; |
583 | |
584 | index = index_word_lo(size_words); |
585 | last_index = index_word_hi(size_words); |
586 | carry = 1; |
587 | for (;;) { |
588 | word = ~m_out[index] + carry; |
589 | m_out[index] = word; |
590 | if (index == last_index) |
591 | break; |
592 | index += word_incr; |
593 | if (word) |
594 | carry = 0; |
595 | } |
596 | } |
597 | |
598 | |
599 | |
600 | |
601 | |
602 | |
603 | |
604 | |
605 | |
606 | |
607 | |
608 | static inline void |
609 | _mesa_add_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out) |
610 | { |
611 | unsigned index, last_index; |
612 | uint8_t carry; |
613 | uint32_t a_word, word; |
614 | |
615 | index = index_word_lo(size_words); |
616 | last_index = index_word_hi(size_words); |
617 | carry = 0; |
618 | for (;;) { |
619 | a_word = a[index]; |
620 | word = a_word + b[index] + carry; |
621 | m_out[index] = word; |
622 | if (index == last_index) |
623 | break; |
624 | if (word != a_word) |
625 | carry = (word < a_word); |
626 | index += word_incr; |
627 | } |
628 | } |
629 | |
630 | |
631 | |
632 | |
633 | |
634 | |
635 | |
636 | |
637 | |
638 | |
639 | |
640 | static inline void |
641 | _mesa_sub_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out) |
642 | { |
643 | unsigned index, last_index; |
644 | uint8_t borrow; |
645 | uint32_t a_word, b_word; |
646 | |
647 | index = index_word_lo(size_words); |
648 | last_index = index_word_hi(size_words); |
649 | borrow = 0; |
650 | for (;;) { |
651 | a_word = a[index]; |
652 | b_word = b[index]; |
653 | m_out[index] = a_word - b_word - borrow; |
654 | if (index == last_index) |
655 | break; |
656 | borrow = borrow ? (a_word <= b_word) : (a_word < b_word); |
657 | index += word_incr; |
658 | } |
659 | } |
660 | |
661 | |
662 | |
663 | |
664 | |
665 | |
666 | |
667 | |
668 | |
669 | |
670 | double |
671 | _mesa_double_sub_rtz(double a, double b) |
672 | { |
673 | const di_type a_di = {a}; |
674 | uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; |
675 | uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; |
676 | uint64_t a_flt_s = (a_di.u >> 63) & 0x1; |
677 | const di_type b_di = {b}; |
678 | uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; |
679 | uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; |
680 | uint64_t b_flt_s = (b_di.u >> 63) & 0x1; |
681 | int64_t s, e, m = 0; |
682 | int64_t m_diff = 0; |
683 | unsigned shift_dist = 0; |
684 | |
685 | s = a_flt_s; |
686 | |
687 | const int64_t exp_diff = a_flt_e - b_flt_e; |
688 | |
689 | |
690 | |
691 | if (a_flt_s != b_flt_s) { |
692 | return _mesa_double_add_rtz(a, -b); |
693 | } else if ((a_flt_e == 0) && (a_flt_m == 0)) { |
694 | |
695 | return -b; |
696 | } else if ((b_flt_e == 0) && (b_flt_m == 0)) { |
697 | |
698 | return a; |
699 | } else if (a_flt_e == 0x7ff && a_flt_m != 0) { |
700 | |
701 | return a; |
702 | } else if (b_flt_e == 0x7ff && b_flt_m != 0) { |
703 | |
704 | return b; |
705 | } else if (a_flt_e == 0x7ff && a_flt_m == 0) { |
706 | if (b_flt_e == 0x7ff && b_flt_m == 0) { |
707 | |
708 | di_type result; |
709 | e = 0x7ff; |
710 | result.u = (s << 63) + (e << 52) + 0x1; |
711 | return result.f; |
712 | } |
713 | |
714 | return a; |
715 | } else if (b_flt_e == 0x7ff && b_flt_m == 0) { |
716 | |
717 | return -b; |
718 | } else if (exp_diff == 0) { |
719 | m_diff = a_flt_m - b_flt_m; |
720 | |
721 | if (m_diff == 0) |
722 | return 0; |
723 | if (a_flt_e) |
724 | --a_flt_e; |
725 | if (m_diff < 0) { |
726 | s = !s; |
727 | m_diff = -m_diff; |
728 | } |
729 | |
730 | shift_dist = _mesa_count_leading_zeros64(m_diff) - 11; |
731 | e = a_flt_e - shift_dist; |
732 | if (e < 0) { |
733 | shift_dist = a_flt_e; |
734 | e = 0; |
735 | } |
736 | |
737 | di_type result; |
738 | result.u = (s << 63) + (e << 52) + (m_diff << shift_dist); |
739 | return result.f; |
740 | } else if (exp_diff < 0) { |
741 | a_flt_m <<= 10; |
742 | b_flt_m <<= 10; |
743 | s = !s; |
744 | |
745 | a_flt_m += (a_flt_e) ? 0x4000000000000000 : a_flt_m; |
746 | a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff); |
747 | b_flt_m |= 0x4000000000000000; |
748 | e = b_flt_e; |
749 | m = b_flt_m - a_flt_m; |
750 | } else { |
751 | a_flt_m <<= 10; |
752 | b_flt_m <<= 10; |
753 | |
754 | b_flt_m += (b_flt_e) ? 0x4000000000000000 : b_flt_m; |
755 | b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff); |
756 | a_flt_m |= 0x4000000000000000; |
757 | e = a_flt_e; |
758 | m = a_flt_m - b_flt_m; |
759 | } |
760 | |
761 | return _mesa_norm_round_pack_f64(s, e - 1, m); |
762 | } |
763 | |
764 | static inline void |
765 | _mesa_norm_subnormal_mantissa_f64(uint64_t m, uint64_t *exp, uint64_t *m_out) |
766 | { |
767 | int shift_dist; |
768 | |
769 | shift_dist = _mesa_count_leading_zeros64(m) - 11; |
770 | *exp = 1 - shift_dist; |
771 | *m_out = m << shift_dist; |
772 | } |
773 | |
774 | static inline void |
775 | _mesa_norm_subnormal_mantissa_f32(uint32_t m, uint32_t *exp, uint32_t *m_out) |
776 | { |
777 | int shift_dist; |
778 | |
779 | shift_dist = _mesa_count_leading_zeros32(m) - 8; |
780 | *exp = 1 - shift_dist; |
781 | *m_out = m << shift_dist; |
782 | } |
783 | |
784 | |
785 | |
786 | |
787 | |
788 | |
789 | |
790 | |
791 | |
792 | static inline void |
793 | _mesa_softfloat_mul_f64_to_f128_m(uint64_t a, uint64_t b, uint32_t *m_out) |
794 | { |
795 | uint32_t a32, a0, b32, b0; |
796 | uint64_t z0, mid1, z64, mid; |
797 | |
798 | a32 = a >> 32; |
799 | a0 = a; |
800 | b32 = b >> 32; |
801 | b0 = b; |
802 | z0 = (uint64_t) a0 * b0; |
803 | mid1 = (uint64_t) a32 * b0; |
804 | mid = mid1 + (uint64_t) a0 * b32; |
805 | z64 = (uint64_t) a32 * b32; |
806 | z64 += (uint64_t) (mid < mid1) << 32 | mid >> 32; |
807 | mid <<= 32; |
808 | z0 += mid; |
809 | m_out[index_word(4, 1)] = z0 >> 32; |
810 | m_out[index_word(4, 0)] = z0; |
811 | z64 += (z0 < mid); |
812 | m_out[index_word(4, 3)] = z64 >> 32; |
813 | m_out[index_word(4, 2)] = z64; |
814 | } |
815 | |
816 | |
817 | |
818 | |
819 | |
820 | |
821 | |
822 | |
823 | |
824 | |
825 | double |
826 | _mesa_double_mul_rtz(double a, double b) |
827 | { |
828 | const di_type a_di = {a}; |
829 | uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; |
830 | uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; |
831 | uint64_t a_flt_s = (a_di.u >> 63) & 0x1; |
832 | const di_type b_di = {b}; |
833 | uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; |
834 | uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; |
835 | uint64_t b_flt_s = (b_di.u >> 63) & 0x1; |
836 | int64_t s, e, m = 0; |
837 | |
838 | s = a_flt_s ^ b_flt_s; |
839 | |
840 | if (a_flt_e == 0x7ff) { |
841 | if (a_flt_m != 0) { |
842 | |
843 | return a; |
844 | } else if (b_flt_e == 0x7ff && b_flt_m != 0) { |
845 | |
846 | return b; |
847 | } |
848 | |
849 | if (!(b_flt_e | b_flt_m)) { |
850 | |
851 | di_type result; |
852 | e = 0x7ff; |
853 | result.u = (s << 63) + (e << 52) + 0x1; |
854 | return result.f; |
855 | } |
856 | |
857 | di_type result; |
858 | e = 0x7ff; |
859 | result.u = (s << 63) + (e << 52) + 0; |
860 | return result.f; |
861 | } |
862 | |
863 | if (b_flt_e == 0x7ff) { |
864 | if (b_flt_m != 0) { |
865 | |
866 | return b; |
867 | } |
868 | if (!(a_flt_e | a_flt_m)) { |
869 | |
870 | di_type result; |
871 | e = 0x7ff; |
872 | result.u = (s << 63) + (e << 52) + 0x1; |
873 | return result.f; |
874 | } |
875 | |
876 | di_type result; |
877 | e = 0x7ff; |
878 | result.u = (s << 63) + (e << 52) + 0; |
879 | return result.f; |
880 | } |
881 | |
882 | if (a_flt_e == 0) { |
883 | if (a_flt_m == 0) { |
884 | |
885 | di_type result; |
886 | result.u = (s << 63) + 0; |
887 | return result.f; |
888 | } |
889 | _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m); |
890 | } |
891 | if (b_flt_e == 0) { |
892 | if (b_flt_m == 0) { |
893 | |
894 | di_type result; |
895 | result.u = (s << 63) + 0; |
896 | return result.f; |
897 | } |
898 | _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m); |
899 | } |
900 | |
901 | e = a_flt_e + b_flt_e - 0x3ff; |
902 | a_flt_m = (a_flt_m | 0x0010000000000000) << 10; |
903 | b_flt_m = (b_flt_m | 0x0010000000000000) << 11; |
904 | |
905 | uint32_t m_128[4]; |
906 | _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128); |
907 | |
908 | m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)]; |
909 | if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) |
910 | m |= 1; |
911 | |
912 | if (m < 0x4000000000000000) { |
913 | --e; |
914 | m <<= 1; |
915 | } |
916 | |
917 | return _mesa_roundtozero_f64(s, e, m); |
918 | } |
919 | |
920 | |
921 | |
922 | |
923 | |
924 | |
925 | |
926 | |
927 | |
928 | |
929 | |
930 | |
931 | double |
932 | _mesa_double_fma_rtz(double a, double b, double c) |
933 | { |
934 | const di_type a_di = {a}; |
935 | uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; |
936 | uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; |
937 | uint64_t a_flt_s = (a_di.u >> 63) & 0x1; |
938 | const di_type b_di = {b}; |
939 | uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; |
940 | uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; |
941 | uint64_t b_flt_s = (b_di.u >> 63) & 0x1; |
942 | const di_type c_di = {c}; |
943 | uint64_t c_flt_m = c_di.u & 0x0fffffffffffff; |
944 | uint64_t c_flt_e = (c_di.u >> 52) & 0x7ff; |
945 | uint64_t c_flt_s = (c_di.u >> 63) & 0x1; |
946 | int64_t s, e, m = 0; |
947 | |
948 | c_flt_s ^= 0; |
949 | s = a_flt_s ^ b_flt_s ^ 0; |
950 | |
951 | if (a_flt_e == 0x7ff) { |
952 | if (a_flt_m != 0) { |
953 | |
954 | return a; |
955 | } else if (b_flt_e == 0x7ff && b_flt_m != 0) { |
956 | |
957 | return b; |
958 | } else if (c_flt_e == 0x7ff && c_flt_m != 0) { |
959 | |
960 | return c; |
961 | } |
962 | |
963 | if (!(b_flt_e | b_flt_m)) { |
964 | |
965 | di_type result; |
966 | e = 0x7ff; |
967 | result.u = (s << 63) + (e << 52) + 0x1; |
968 | return result.f; |
969 | } |
970 | |
971 | if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) { |
972 | |
973 | di_type result; |
974 | e = 0x7ff; |
975 | result.u = (s << 63) + (e << 52) + 0x1; |
976 | return result.f; |
977 | } |
978 | |
979 | |
980 | di_type result; |
981 | e = 0x7ff; |
982 | result.u = (s << 63) + (e << 52) + 0; |
983 | return result.f; |
984 | } |
985 | |
986 | if (b_flt_e == 0x7ff) { |
987 | if (b_flt_m != 0) { |
988 | |
989 | return b; |
990 | } else if (c_flt_e == 0x7ff && c_flt_m != 0) { |
991 | |
992 | return c; |
993 | } |
994 | |
995 | if (!(a_flt_e | a_flt_m)) { |
996 | |
997 | di_type result; |
998 | e = 0x7ff; |
999 | result.u = (s << 63) + (e << 52) + 0x1; |
1000 | return result.f; |
1001 | } |
1002 | |
1003 | if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) { |
1004 | |
1005 | di_type result; |
1006 | e = 0x7ff; |
1007 | result.u = (s << 63) + (e << 52) + 0x1; |
1008 | return result.f; |
1009 | } |
1010 | |
1011 | |
1012 | di_type result; |
1013 | e = 0x7ff; |
1014 | result.u = (s << 63) + (e << 52) + 0; |
1015 | return result.f; |
1016 | } |
1017 | |
1018 | if (c_flt_e == 0x7ff) { |
1019 | if (c_flt_m != 0) { |
1020 | |
1021 | return c; |
1022 | } |
1023 | |
1024 | |
1025 | return c; |
1026 | } |
1027 | |
1028 | if (a_flt_e == 0) { |
1029 | if (a_flt_m == 0) { |
1030 | |
1031 | return c; |
1032 | } |
1033 | _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m); |
1034 | } |
1035 | |
1036 | if (b_flt_e == 0) { |
1037 | if (b_flt_m == 0) { |
1038 | |
1039 | return c; |
1040 | } |
1041 | _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m); |
1042 | } |
1043 | |
1044 | e = a_flt_e + b_flt_e - 0x3fe; |
1045 | a_flt_m = (a_flt_m | 0x0010000000000000) << 10; |
1046 | b_flt_m = (b_flt_m | 0x0010000000000000) << 11; |
1047 | |
1048 | uint32_t m_128[4]; |
1049 | _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128); |
1050 | |
1051 | m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)]; |
1052 | |
1053 | int64_t shift_dist = 0; |
1054 | if (!(m & 0x4000000000000000)) { |
1055 | --e; |
1056 | shift_dist = -1; |
1057 | } |
1058 | |
1059 | if (c_flt_e == 0) { |
1060 | if (c_flt_m == 0) { |
1061 | |
1062 | if (shift_dist) |
1063 | m <<= 1; |
1064 | |
1065 | if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) |
1066 | m |= 1; |
1067 | return _mesa_roundtozero_f64(s, e - 1, m); |
1068 | } |
1069 | _mesa_norm_subnormal_mantissa_f64(c_flt_m , &c_flt_e, &c_flt_m); |
1070 | } |
1071 | c_flt_m = (c_flt_m | 0x0010000000000000) << 10; |
1072 | |
1073 | uint32_t c_flt_m_128[4]; |
1074 | int64_t exp_diff = e - c_flt_e; |
1075 | if (exp_diff < 0) { |
1076 | e = c_flt_e; |
1077 | if ((s == c_flt_s) || (exp_diff < -1)) { |
1078 | shift_dist -= exp_diff; |
1079 | if (shift_dist) { |
1080 | m = _mesa_shift_right_jam64(m, shift_dist); |
1081 | } |
1082 | } else { |
1083 | if (!shift_dist) { |
1084 | _mesa_short_shift_right_m(4, m_128, 1, m_128); |
1085 | } |
1086 | } |
1087 | } else { |
1088 | if (shift_dist) |
1089 | _mesa_add_m(4, m_128, m_128, m_128); |
1090 | if (!exp_diff) { |
1091 | m = (uint64_t) m_128[index_word(4, 3)] << 32 |
1092 | | m_128[index_word(4, 2)]; |
1093 | } else { |
1094 | c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32; |
1095 | c_flt_m_128[index_word(4, 2)] = c_flt_m; |
1096 | c_flt_m_128[index_word(4, 1)] = 0; |
1097 | c_flt_m_128[index_word(4, 0)] = 0; |
1098 | _mesa_shift_right_jam_m(4, c_flt_m_128, exp_diff, c_flt_m_128); |
1099 | } |
1100 | } |
1101 | |
1102 | if (s == c_flt_s) { |
1103 | if (exp_diff <= 0) { |
1104 | m += c_flt_m; |
1105 | } else { |
1106 | _mesa_add_m(4, m_128, c_flt_m_128, m_128); |
1107 | m = (uint64_t) m_128[index_word(4, 3)] << 32 |
1108 | | m_128[index_word(4, 2)]; |
1109 | } |
1110 | if (m & 0x8000000000000000) { |
1111 | e++; |
1112 | m = _mesa_short_shift_right_jam64(m, 1); |
1113 | } |
1114 | } else { |
1115 | if (exp_diff < 0) { |
1116 | s = c_flt_s; |
1117 | if (exp_diff < -1) { |
1118 | m = c_flt_m - m; |
1119 | if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) { |
1120 | m = (m - 1) | 1; |
1121 | } |
1122 | if (!(m & 0x4000000000000000)) { |
1123 | --e; |
1124 | m <<= 1; |
1125 | } |
1126 | return _mesa_roundtozero_f64(s, e - 1, m); |
1127 | } else { |
1128 | c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32; |
1129 | c_flt_m_128[index_word(4, 2)] = c_flt_m; |
1130 | c_flt_m_128[index_word(4, 1)] = 0; |
1131 | c_flt_m_128[index_word(4, 0)] = 0; |
1132 | _mesa_sub_m(4, c_flt_m_128, m_128, m_128); |
1133 | } |
1134 | } else if (!exp_diff) { |
1135 | m -= c_flt_m; |
1136 | if (!m && !m_128[index_word(4, 1)] && !m_128[index_word(4, 0)]) { |
1137 | |
1138 | di_type result; |
1139 | result.u = (s << 63) + 0; |
1140 | return result.f; |
1141 | } |
1142 | m_128[index_word(4, 3)] = m >> 32; |
1143 | m_128[index_word(4, 2)] = m; |
1144 | if (m & 0x8000000000000000) { |
1145 | s = !s; |
1146 | _mesa_neg_x_m(4, m_128); |
1147 | } |
1148 | } else { |
1149 | _mesa_sub_m(4, m_128, c_flt_m_128, m_128); |
1150 | if (1 < exp_diff) { |
1151 | m = (uint64_t) m_128[index_word(4, 3)] << 32 |
1152 | | m_128[index_word(4, 2)]; |
1153 | if (!(m & 0x4000000000000000)) { |
1154 | --e; |
1155 | m <<= 1; |
1156 | } |
1157 | if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) |
1158 | m |= 1; |
1159 | return _mesa_roundtozero_f64(s, e - 1, m); |
1160 | } |
1161 | } |
1162 | |
1163 | shift_dist = 0; |
1164 | m = (uint64_t) m_128[index_word(4, 3)] << 32 |
1165 | | m_128[index_word(4, 2)]; |
1166 | if (!m) { |
1167 | shift_dist = 64; |
1168 | m = (uint64_t) m_128[index_word(4, 1)] << 32 |
1169 | | m_128[index_word(4, 0)]; |
1170 | } |
1171 | shift_dist += _mesa_count_leading_zeros64(m) - 1; |
1172 | if (shift_dist) { |
1173 | e -= shift_dist; |
1174 | _mesa_shift_left_m(4, m_128, shift_dist, m_128); |
1175 | m = (uint64_t) m_128[index_word(4, 3)] << 32 |
1176 | | m_128[index_word(4, 2)]; |
1177 | } |
1178 | } |
1179 | |
1180 | if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) |
1181 | m |= 1; |
1182 | return _mesa_roundtozero_f64(s, e - 1, m); |
1183 | } |
1184 | |
1185 | |
1186 | |
1187 | |
1188 | |
1189 | |
1190 | |
1191 | |
1192 | |
1193 | |
1194 | |
1195 | |
1196 | float |
1197 | _mesa_float_fma_rtz(float a, float b, float c) |
1198 | { |
1199 | const fi_type a_fi = {a}; |
1200 | uint32_t a_flt_m = a_fi.u & 0x07fffff; |
1201 | uint32_t a_flt_e = (a_fi.u >> 23) & 0xff; |
1202 | uint32_t a_flt_s = (a_fi.u >> 31) & 0x1; |
1203 | const fi_type b_fi = {b}; |
1204 | uint32_t b_flt_m = b_fi.u & 0x07fffff; |
1205 | uint32_t b_flt_e = (b_fi.u >> 23) & 0xff; |
1206 | uint32_t b_flt_s = (b_fi.u >> 31) & 0x1; |
1207 | const fi_type c_fi = {c}; |
1208 | uint32_t c_flt_m = c_fi.u & 0x07fffff; |
1209 | uint32_t c_flt_e = (c_fi.u >> 23) & 0xff; |
1210 | uint32_t c_flt_s = (c_fi.u >> 31) & 0x1; |
1211 | int32_t s, e, m = 0; |
1212 | |
1213 | c_flt_s ^= 0; |
1214 | s = a_flt_s ^ b_flt_s ^ 0; |
1215 | |
1216 | if (a_flt_e == 0xff) { |
1217 | if (a_flt_m != 0) { |
1218 | |
1219 | return a; |
1220 | } else if (b_flt_e == 0xff && b_flt_m != 0) { |
1221 | |
1222 | return b; |
1223 | } else if (c_flt_e == 0xff && c_flt_m != 0) { |
1224 | |
1225 | return c; |
1226 | } |
1227 | |
1228 | if (!(b_flt_e | b_flt_m)) { |
1229 | |
1230 | fi_type result; |
1231 | e = 0xff; |
1232 | result.u = (s << 31) + (e << 23) + 0x1; |
1233 | return result.f; |
1234 | } |
1235 | |
1236 | if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) { |
1237 | |
1238 | fi_type result; |
1239 | e = 0xff; |
1240 | result.u = (s << 31) + (e << 23) + 0x1; |
1241 | return result.f; |
1242 | } |
1243 | |
1244 | |
1245 | fi_type result; |
1246 | e = 0xff; |
1247 | result.u = (s << 31) + (e << 23) + 0; |
1248 | return result.f; |
1249 | } |
1250 | |
1251 | if (b_flt_e == 0xff) { |
1252 | if (b_flt_m != 0) { |
1253 | |
1254 | return b; |
1255 | } else if (c_flt_e == 0xff && c_flt_m != 0) { |
1256 | |
1257 | return c; |
1258 | } |
1259 | |
1260 | if (!(a_flt_e | a_flt_m)) { |
1261 | |
1262 | fi_type result; |
1263 | e = 0xff; |
1264 | result.u = (s << 31) + (e << 23) + 0x1; |
1265 | return result.f; |
1266 | } |
1267 | |
1268 | if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) { |
1269 | |
1270 | fi_type result; |
1271 | e = 0xff; |
1272 | result.u = (s << 31) + (e << 23) + 0x1; |
1273 | return result.f; |
1274 | } |
1275 | |
1276 | |
1277 | fi_type result; |
1278 | e = 0xff; |
1279 | result.u = (s << 31) + (e << 23) + 0; |
1280 | return result.f; |
1281 | } |
1282 | |
1283 | if (c_flt_e == 0xff) { |
1284 | if (c_flt_m != 0) { |
1285 | |
1286 | return c; |
1287 | } |
1288 | |
1289 | |
1290 | return c; |
1291 | } |
1292 | |
1293 | if (a_flt_e == 0) { |
1294 | if (a_flt_m == 0) { |
1295 | |
1296 | return c; |
1297 | } |
1298 | _mesa_norm_subnormal_mantissa_f32(a_flt_m , &a_flt_e, &a_flt_m); |
1299 | } |
1300 | |
1301 | if (b_flt_e == 0) { |
1302 | if (b_flt_m == 0) { |
1303 | |
1304 | return c; |
1305 | } |
1306 | _mesa_norm_subnormal_mantissa_f32(b_flt_m , &b_flt_e, &b_flt_m); |
1307 | } |
1308 | |
1309 | e = a_flt_e + b_flt_e - 0x7e; |
1310 | a_flt_m = (a_flt_m | 0x00800000) << 7; |
1311 | b_flt_m = (b_flt_m | 0x00800000) << 7; |
1312 | |
1313 | uint64_t m_64 = (uint64_t) a_flt_m * b_flt_m; |
1314 | if (m_64 < 0x2000000000000000) { |
1315 | --e; |
1316 | m_64 <<= 1; |
1317 | } |
1318 | |
1319 | if (c_flt_e == 0) { |
1320 | if (c_flt_m == 0) { |
1321 | |
1322 | m = _mesa_short_shift_right_jam64(m_64, 31); |
1323 | return _mesa_round_f32(s, e - 1, m, true); |
1324 | } |
1325 | _mesa_norm_subnormal_mantissa_f32(c_flt_m , &c_flt_e, &c_flt_m); |
1326 | } |
1327 | c_flt_m = (c_flt_m | 0x00800000) << 6; |
1328 | |
1329 | int16_t exp_diff = e - c_flt_e; |
1330 | if (s == c_flt_s) { |
1331 | if (exp_diff <= 0) { |
1332 | e = c_flt_e; |
1333 | m = c_flt_m + _mesa_shift_right_jam64(m_64, 32 - exp_diff); |
1334 | } else { |
1335 | m_64 += _mesa_shift_right_jam64((uint64_t) c_flt_m << 32, exp_diff); |
1336 | m = _mesa_short_shift_right_jam64(m_64, 32); |
1337 | } |
1338 | if (m < 0x40000000) { |
1339 | --e; |
1340 | m <<= 1; |
1341 | } |
1342 | } else { |
1343 | uint64_t c_flt_m_64 = (uint64_t) c_flt_m << 32; |
1344 | if (exp_diff < 0) { |
1345 | s = c_flt_s; |
1346 | e = c_flt_e; |
1347 | m_64 = c_flt_m_64 - _mesa_shift_right_jam64(m_64, -exp_diff); |
1348 | } else if (!exp_diff) { |
1349 | m_64 -= c_flt_m_64; |
1350 | if (!m_64) { |
1351 | |
1352 | fi_type result; |
1353 | result.u = (s << 31) + 0; |
1354 | return result.f; |
1355 | } |
1356 | if (m_64 & 0x8000000000000000) { |
1357 | s = !s; |
1358 | m_64 = -m_64; |
1359 | } |
1360 | } else { |
1361 | m_64 -= _mesa_shift_right_jam64(c_flt_m_64, exp_diff); |
1362 | } |
1363 | int8_t shift_dist = _mesa_count_leading_zeros64(m_64) - 1; |
1364 | e -= shift_dist; |
1365 | shift_dist -= 32; |
1366 | if (shift_dist < 0) { |
1367 | m = _mesa_short_shift_right_jam64(m_64, -shift_dist); |
1368 | } else { |
1369 | m = (uint32_t) m_64 << shift_dist; |
1370 | } |
1371 | } |
1372 | |
1373 | return _mesa_round_f32(s, e, m, true); |
1374 | } |
1375 | |
1376 | |
1377 | |
1378 | |
1379 | |
1380 | |
1381 | |
1382 | |
1383 | float |
1384 | _mesa_double_to_f32(double val, bool rtz) |
1385 | { |
1386 | const di_type di = {val}; |
1387 | uint64_t flt_m = di.u & 0x0fffffffffffff; |
1388 | uint64_t flt_e = (di.u >> 52) & 0x7ff; |
1389 | uint64_t flt_s = (di.u >> 63) & 0x1; |
1390 | int32_t s, e, m = 0; |
1391 | |
1392 | s = flt_s; |
1393 | |
1394 | if (flt_e == 0x7ff) { |
1395 | if (flt_m != 0) { |
1396 | |
1397 | fi_type result; |
1398 | e = 0xff; |
1399 | m = 0x1; |
1400 | result.u = (s << 31) + (e << 23) + m; |
1401 | return result.f; |
1402 | } |
1403 | |
1404 | |
1405 | fi_type result; |
1406 | e = 0xff; |
1407 | result.u = (s << 31) + (e << 23) + m; |
1408 | return result.f; |
1409 | } |
1410 | |
1411 | if (!(flt_e | flt_m)) { |
1412 | |
1413 | fi_type result; |
1414 | e = 0; |
1415 | result.u = (s << 31) + (e << 23) + m; |
1416 | return result.f; |
1417 | } |
1418 | |
1419 | m = _mesa_short_shift_right_jam64(flt_m, 22); |
1420 | if ( ! (flt_e | m) ) { |
1421 | |
1422 | fi_type result; |
1423 | e = 0; |
1424 | result.u = (s << 31) + (e << 23) + m; |
1425 | return result.f; |
1426 | } |
1427 | |
1428 | return _mesa_round_f32(s, flt_e - 0x381, m | 0x40000000, rtz); |
1429 | } |
1430 | |
1431 | |
1432 | |
1433 | |
1434 | |
1435 | |
1436 | |
1437 | uint16_t |
1438 | _mesa_float_to_half_rtz(float val) |
1439 | { |
1440 | const fi_type fi = {val}; |
1441 | const uint32_t flt_m = fi.u & 0x7fffff; |
1442 | const uint32_t flt_e = (fi.u >> 23) & 0xff; |
1443 | const uint32_t flt_s = (fi.u >> 31) & 0x1; |
1444 | int16_t s, e, m = 0; |
1445 | |
1446 | s = flt_s; |
1447 | |
1448 | if (flt_e == 0xff) { |
1449 | if (flt_m != 0) { |
1450 | |
1451 | e = 0x1f; |
1452 | m = 0x1; |
1453 | return (s << 15) + (e << 10) + m; |
1454 | } |
1455 | |
1456 | |
1457 | e = 0x1f; |
1458 | return (s << 15) + (e << 10) + m; |
1459 | } |
1460 | |
1461 | if (!(flt_e | flt_m)) { |
1462 | |
1463 | e = 0; |
1464 | return (s << 15) + (e << 10) + m; |
1465 | } |
1466 | |
1467 | m = flt_m >> 9 | ((flt_m & 0x1ff) != 0); |
1468 | if ( ! (flt_e | m) ) { |
1469 | |
1470 | e = 0; |
1471 | return (s << 15) + (e << 10) + m; |
1472 | } |
1473 | |
1474 | return _mesa_roundtozero_f16(s, flt_e - 0x71, m | 0x4000); |
1475 | } |