File: | var/lib/jenkins/workspace/firefox-scan-build/gfx/2d/ssse3-scaler.c |
Warning: | line 293, column 34 Dereference of null pointer |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * Copyright © 2013 Soren Sandmann Pedersen | |||
3 | * Copyright © 2013 Red Hat, Inc. | |||
4 | * Copyright © 2016 Mozilla Foundation | |||
5 | * | |||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | |||
7 | * copy of this software and associated documentation files (the "Software"), | |||
8 | * to deal in the Software without restriction, including without limitation | |||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
10 | * and/or sell copies of the Software, and to permit persons to whom the | |||
11 | * Software is furnished to do so, subject to the following conditions: | |||
12 | * | |||
13 | * The above copyright notice and this permission notice (including the next | |||
14 | * paragraph) shall be included in all copies or substantial portions of the | |||
15 | * Software. | |||
16 | * | |||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
22 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |||
23 | * DEALINGS IN THE SOFTWARE. | |||
24 | * | |||
25 | * Author: Soren Sandmann (soren.sandmann@gmail.com) | |||
26 | * Jeff Muizelaar (jmuizelaar@mozilla.com) | |||
27 | */ | |||
28 | ||||
29 | /* This has been adapted from the ssse3 code from pixman. It's currently | |||
30 | * a mess as I want to try it out in practice before finalizing the details. | |||
31 | */ | |||
32 | ||||
33 | #include <stdlib.h> | |||
34 | #include <xmmintrin.h> | |||
35 | #include <emmintrin.h> | |||
36 | #include <tmmintrin.h> | |||
37 | #include <stdint.h> | |||
38 | #include <assert.h> | |||
39 | #include "ssse3-scaler.h" | |||
40 | ||||
41 | typedef int32_t pixman_fixed_16_16_t; | |||
42 | typedef pixman_fixed_16_16_t pixman_fixed_t; | |||
43 | #define pixman_fixed_1(((pixman_fixed_t)((1) << 16))) (pixman_int_to_fixed(1)((pixman_fixed_t)((1) << 16))) | |||
44 | #define pixman_fixed_to_int(f)((int)((f) >> 16)) ((int)((f) >> 16)) | |||
45 | #define pixman_int_to_fixed(i)((pixman_fixed_t)((i) << 16)) ((pixman_fixed_t)((i) << 16)) | |||
46 | #define pixman_double_to_fixed(d)((pixman_fixed_t)((d) * 65536.0)) ((pixman_fixed_t)((d) * 65536.0)) | |||
47 | #define PIXMAN_FIXED_INT_MAX32767 32767 | |||
48 | #define PIXMAN_FIXED_INT_MIN-32768 -32768 | |||
49 | typedef struct pixman_vector pixman_vector_t; | |||
50 | ||||
51 | typedef int pixman_bool_t; | |||
52 | typedef int64_t pixman_fixed_32_32_t; | |||
53 | typedef pixman_fixed_32_32_t pixman_fixed_48_16_t; | |||
54 | typedef struct { | |||
55 | pixman_fixed_48_16_t v[3]; | |||
56 | } pixman_vector_48_16_t; | |||
57 | ||||
58 | struct pixman_vector { | |||
59 | pixman_fixed_t vector[3]; | |||
60 | }; | |||
61 | typedef struct pixman_transform pixman_transform_t; | |||
62 | ||||
63 | struct pixman_transform { | |||
64 | pixman_fixed_t matrix[3][3]; | |||
65 | }; | |||
66 | ||||
67 | #ifdef _MSC_VER | |||
68 | # define force_inline__inline__ __attribute__((always_inline)) __forceinline | |||
69 | #else | |||
70 | # define force_inline__inline__ __attribute__((always_inline)) __inline__ __attribute__((always_inline)) | |||
71 | #endif | |||
72 | ||||
73 | #define BILINEAR_INTERPOLATION_BITS6 6 | |||
74 | ||||
75 | static force_inline__inline__ __attribute__((always_inline)) int pixman_fixed_to_bilinear_weight(pixman_fixed_t x) { | |||
76 | return (x >> (16 - BILINEAR_INTERPOLATION_BITS6)) & | |||
77 | ((1 << BILINEAR_INTERPOLATION_BITS6) - 1); | |||
78 | } | |||
79 | ||||
80 | static void pixman_transform_point_31_16_3d(const pixman_transform_t* t, | |||
81 | const pixman_vector_48_16_t* v, | |||
82 | pixman_vector_48_16_t* result) { | |||
83 | int i; | |||
84 | int64_t tmp[3][2]; | |||
85 | ||||
86 | /* input vector values must have no more than 31 bits (including sign) | |||
87 | * in the integer part */ | |||
88 | assert(v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16)))((void) sizeof ((v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))) ? 1 : 0), __extension__ ({ if (v->v[0] < ( (pixman_fixed_48_16_t)1 << (30 + 16))) ; else __assert_fail ("v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))" , "/var/lib/jenkins/workspace/firefox-scan-build/gfx/2d/ssse3-scaler.c" , 88, __extension__ __PRETTY_FUNCTION__); })); | |||
89 | assert(v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)))((void) sizeof ((v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))) ? 1 : 0), __extension__ ({ if (v->v[0] >= - ((pixman_fixed_48_16_t)1 << (30 + 16))) ; else __assert_fail ("v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))" , "/var/lib/jenkins/workspace/firefox-scan-build/gfx/2d/ssse3-scaler.c" , 89, __extension__ __PRETTY_FUNCTION__); })); | |||
90 | assert(v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16)))((void) sizeof ((v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))) ? 1 : 0), __extension__ ({ if (v->v[1] < ( (pixman_fixed_48_16_t)1 << (30 + 16))) ; else __assert_fail ("v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))" , "/var/lib/jenkins/workspace/firefox-scan-build/gfx/2d/ssse3-scaler.c" , 90, __extension__ __PRETTY_FUNCTION__); })); | |||
91 | assert(v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)))((void) sizeof ((v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))) ? 1 : 0), __extension__ ({ if (v->v[1] >= - ((pixman_fixed_48_16_t)1 << (30 + 16))) ; else __assert_fail ("v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))" , "/var/lib/jenkins/workspace/firefox-scan-build/gfx/2d/ssse3-scaler.c" , 91, __extension__ __PRETTY_FUNCTION__); })); | |||
92 | assert(v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16)))((void) sizeof ((v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))) ? 1 : 0), __extension__ ({ if (v->v[2] < ( (pixman_fixed_48_16_t)1 << (30 + 16))) ; else __assert_fail ("v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))" , "/var/lib/jenkins/workspace/firefox-scan-build/gfx/2d/ssse3-scaler.c" , 92, __extension__ __PRETTY_FUNCTION__); })); | |||
93 | assert(v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)))((void) sizeof ((v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))) ? 1 : 0), __extension__ ({ if (v->v[2] >= - ((pixman_fixed_48_16_t)1 << (30 + 16))) ; else __assert_fail ("v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))" , "/var/lib/jenkins/workspace/firefox-scan-build/gfx/2d/ssse3-scaler.c" , 93, __extension__ __PRETTY_FUNCTION__); })); | |||
94 | ||||
95 | for (i = 0; i < 3; i++) { | |||
96 | tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); | |||
97 | tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); | |||
98 | tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); | |||
99 | tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); | |||
100 | tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); | |||
101 | tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); | |||
102 | } | |||
103 | ||||
104 | result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); | |||
105 | result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); | |||
106 | result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16); | |||
107 | } | |||
108 | ||||
109 | static pixman_bool_t pixman_transform_point_3d( | |||
110 | const struct pixman_transform* transform, struct pixman_vector* vector) { | |||
111 | pixman_vector_48_16_t tmp; | |||
112 | tmp.v[0] = vector->vector[0]; | |||
113 | tmp.v[1] = vector->vector[1]; | |||
114 | tmp.v[2] = vector->vector[2]; | |||
115 | ||||
116 | pixman_transform_point_31_16_3d(transform, &tmp, &tmp); | |||
117 | ||||
118 | vector->vector[0] = tmp.v[0]; | |||
119 | vector->vector[1] = tmp.v[1]; | |||
120 | vector->vector[2] = tmp.v[2]; | |||
121 | ||||
122 | return vector->vector[0] == tmp.v[0] && vector->vector[1] == tmp.v[1] && | |||
123 | vector->vector[2] == tmp.v[2]; | |||
124 | } | |||
125 | ||||
126 | struct bits_image_t { | |||
127 | uint32_t* bits; | |||
128 | int rowstride; | |||
129 | pixman_transform_t* transform; | |||
130 | }; | |||
131 | ||||
132 | typedef struct bits_image_t bits_image_t; | |||
133 | typedef struct { | |||
134 | int unused; | |||
135 | } pixman_iter_info_t; | |||
136 | ||||
137 | typedef struct pixman_iter_t pixman_iter_t; | |||
138 | typedef void (*pixman_iter_fini_t)(pixman_iter_t* iter); | |||
139 | ||||
140 | struct pixman_iter_t { | |||
141 | int x, y; | |||
142 | pixman_iter_fini_t fini; | |||
143 | bits_image_t* image; | |||
144 | uint32_t* buffer; | |||
145 | int width; | |||
146 | int height; | |||
147 | void* data; | |||
148 | }; | |||
149 | ||||
150 | typedef struct { | |||
151 | int y; | |||
152 | uint64_t* buffer; | |||
153 | } line_t; | |||
154 | ||||
155 | typedef struct { | |||
156 | line_t lines[2]; | |||
157 | pixman_fixed_t y; | |||
158 | pixman_fixed_t x; | |||
159 | uint64_t data[1]; | |||
160 | } bilinear_info_t; | |||
161 | ||||
162 | static void ssse3_fetch_horizontal(bits_image_t* image, line_t* line, int y, | |||
163 | pixman_fixed_t x, pixman_fixed_t ux, int n) { | |||
164 | uint32_t* bits = image->bits + y * image->rowstride; | |||
165 | __m128i vx = _mm_set_epi16(-(x + 1), x, -(x + 1), x, -(x + ux + 1), x + ux, | |||
166 | -(x + ux + 1), x + ux); | |||
167 | __m128i vux = _mm_set_epi16(-2 * ux, 2 * ux, -2 * ux, 2 * ux, -2 * ux, 2 * ux, | |||
168 | -2 * ux, 2 * ux); | |||
169 | __m128i vaddc = _mm_set_epi16(1, 0, 1, 0, 1, 0, 1, 0); | |||
170 | __m128i* b = (__m128i*)line->buffer; | |||
171 | __m128i vrl0, vrl1; | |||
172 | ||||
173 | while ((n -= 2) >= 0) { | |||
174 | __m128i vw, vr, s; | |||
175 | #ifdef HACKY_PADDING | |||
176 | if (pixman_fixed_to_int(x + ux)((int)((x + ux) >> 16)) >= image->rowstride) { | |||
177 | vrl1 = _mm_setzero_si128(); | |||
178 | printf("overread 2loop\n"); | |||
179 | } else { | |||
180 | if (pixman_fixed_to_int(x + ux)((int)((x + ux) >> 16)) < 0) printf("underflow\n"); | |||
181 | vrl1 = _mm_loadl_epi64( | |||
182 | (__m128i*)(bits + (pixman_fixed_to_int(x + ux)((int)((x + ux) >> 16)) < 0 | |||
183 | ? 0 | |||
184 | : pixman_fixed_to_int(x + ux)((int)((x + ux) >> 16))))); | |||
185 | } | |||
186 | #else | |||
187 | vrl1 = _mm_loadl_epi64((__m128i*)(bits + pixman_fixed_to_int(x + ux)((int)((x + ux) >> 16)))); | |||
188 | #endif | |||
189 | /* vrl1: R1, L1 */ | |||
190 | ||||
191 | final_pixel: | |||
192 | #ifdef HACKY_PADDING | |||
193 | vrl0 = _mm_loadl_epi64( | |||
194 | (__m128i*)(bits + | |||
195 | (pixman_fixed_to_int(x)((int)((x) >> 16)) < 0 ? 0 : pixman_fixed_to_int(x)((int)((x) >> 16))))); | |||
196 | #else | |||
197 | vrl0 = _mm_loadl_epi64((__m128i*)(bits + pixman_fixed_to_int(x)((int)((x) >> 16)))); | |||
198 | #endif | |||
199 | /* vrl0: R0, L0 */ | |||
200 | ||||
201 | /* The weights are based on vx which is a vector of | |||
202 | * | |||
203 | * - (x + 1), x, - (x + 1), x, | |||
204 | * - (x + ux + 1), x + ux, - (x + ux + 1), x + ux | |||
205 | * | |||
206 | * so the 16 bit weights end up like this: | |||
207 | * | |||
208 | * iw0, w0, iw0, w0, iw1, w1, iw1, w1 | |||
209 | * | |||
210 | * and after shifting and packing, we get these bytes: | |||
211 | * | |||
212 | * iw0, w0, iw0, w0, iw1, w1, iw1, w1, | |||
213 | * iw0, w0, iw0, w0, iw1, w1, iw1, w1, | |||
214 | * | |||
215 | * which means the first and the second input pixel | |||
216 | * have to be interleaved like this: | |||
217 | * | |||
218 | * la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, | |||
219 | * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 | |||
220 | * | |||
221 | * before maddubsw can be used. | |||
222 | */ | |||
223 | ||||
224 | vw = _mm_add_epi16(vaddc, | |||
225 | _mm_srli_epi16(vx, 16 - BILINEAR_INTERPOLATION_BITS6)); | |||
226 | /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1 | |||
227 | */ | |||
228 | ||||
229 | vw = _mm_packus_epi16(vw, vw); | |||
230 | /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1, | |||
231 | * iw0, w0, iw0, w0, iw1, w1, iw1, w1 | |||
232 | */ | |||
233 | vx = _mm_add_epi16(vx, vux); | |||
234 | ||||
235 | x += 2 * ux; | |||
236 | ||||
237 | vr = _mm_unpacklo_epi16(vrl1, vrl0); | |||
238 | /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */ | |||
239 | ||||
240 | s = _mm_shuffle_epi32(vr, _MM_SHUFFLE(1, 0, 3, 2))((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(vr), (int)( (((1) << 6) | ((0) << 4) | ((3) << 2) | (2) )))); | |||
241 | /* s: lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */ | |||
242 | ||||
243 | vr = _mm_unpackhi_epi8(vr, s); | |||
244 | /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, | |||
245 | * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 | |||
246 | */ | |||
247 | ||||
248 | vr = _mm_maddubs_epi16(vr, vw); | |||
249 | ||||
250 | /* When the weight is 0, the inverse weight is | |||
251 | * 128 which can't be represented in a signed byte. | |||
252 | * As a result maddubsw computes the following: | |||
253 | * | |||
254 | * r = l * -128 + r * 0 | |||
255 | * | |||
256 | * rather than the desired | |||
257 | * | |||
258 | * r = l * 128 + r * 0 | |||
259 | * | |||
260 | * We fix this by taking the absolute value of the | |||
261 | * result. | |||
262 | */ | |||
263 | // we can drop this if we use lower precision | |||
264 | ||||
265 | vr = _mm_shuffle_epi32(vr, _MM_SHUFFLE(2, 0, 3, 1))((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(vr), (int)( (((2) << 6) | ((0) << 4) | ((3) << 2) | (1) )))); | |||
266 | /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */ | |||
267 | _mm_store_si128(b++, vr); | |||
268 | } | |||
269 | ||||
270 | if (n == -1) { | |||
271 | vrl1 = _mm_setzero_si128(); | |||
272 | goto final_pixel; | |||
273 | } | |||
274 | ||||
275 | line->y = y; | |||
276 | } | |||
277 | ||||
278 | // scale a line of destination pixels | |||
279 | static uint32_t* ssse3_fetch_bilinear_cover(pixman_iter_t* iter, | |||
280 | const uint32_t* mask) { | |||
281 | pixman_fixed_t fx, ux; | |||
282 | bilinear_info_t* info = iter->data; | |||
283 | line_t *line0, *line1; | |||
284 | int y0, y1; | |||
285 | int32_t dist_y; | |||
286 | __m128i vw, uvw; | |||
287 | int i; | |||
288 | ||||
289 | fx = info->x; | |||
290 | ux = iter->image->transform->matrix[0][0]; | |||
291 | ||||
292 | y0 = pixman_fixed_to_int(info->y)((int)((info->y) >> 16)); | |||
293 | if (y0 < 0) *(volatile char*)0 = 9; | |||
| ||||
294 | y1 = y0 + 1; | |||
295 | ||||
296 | // clamping in y direction | |||
297 | if (y1 >= iter->height) { | |||
298 | y1 = iter->height - 1; | |||
299 | } | |||
300 | ||||
301 | line0 = &info->lines[y0 & 0x01]; | |||
302 | line1 = &info->lines[y1 & 0x01]; | |||
303 | ||||
304 | if (line0->y != y0) { | |||
305 | ssse3_fetch_horizontal(iter->image, line0, y0, fx, ux, iter->width); | |||
306 | } | |||
307 | ||||
308 | if (line1->y != y1) { | |||
309 | ssse3_fetch_horizontal(iter->image, line1, y1, fx, ux, iter->width); | |||
310 | } | |||
311 | ||||
312 | #ifdef PIXMAN_STYLE_INTERPOLATION | |||
313 | dist_y = pixman_fixed_to_bilinear_weight(info->y); | |||
314 | dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS6); | |||
315 | ||||
316 | vw = _mm_set_epi16(dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, | |||
317 | dist_y); | |||
318 | ||||
319 | #else | |||
320 | // setup the weights for the top (vw) and bottom (uvw) lines | |||
321 | dist_y = pixman_fixed_to_bilinear_weight(info->y); | |||
322 | // we use 15 instead of 16 because we need an extra bit to handle when the | |||
323 | // weights are 0 and 1 | |||
324 | dist_y <<= (15 - BILINEAR_INTERPOLATION_BITS6); | |||
325 | ||||
326 | vw = _mm_set_epi16(dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, | |||
327 | dist_y); | |||
328 | ||||
329 | dist_y = (1 << BILINEAR_INTERPOLATION_BITS6) - | |||
330 | pixman_fixed_to_bilinear_weight(info->y); | |||
331 | dist_y <<= (15 - BILINEAR_INTERPOLATION_BITS6); | |||
332 | uvw = _mm_set_epi16(dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, | |||
333 | dist_y); | |||
334 | #endif | |||
335 | ||||
336 | for (i = 0; i + 3 < iter->width; i += 4) { | |||
337 | __m128i top0 = _mm_load_si128((__m128i*)(line0->buffer + i)); | |||
338 | __m128i bot0 = _mm_load_si128((__m128i*)(line1->buffer + i)); | |||
339 | __m128i top1 = _mm_load_si128((__m128i*)(line0->buffer + i + 2)); | |||
340 | __m128i bot1 = _mm_load_si128((__m128i*)(line1->buffer + i + 2)); | |||
341 | #ifdef PIXMAN_STYLE_INTERPOLATION | |||
342 | __m128i r0, r1, tmp, p; | |||
343 | ||||
344 | r0 = _mm_mulhi_epu16(_mm_sub_epi16(bot0, top0), vw); | |||
345 | tmp = _mm_cmplt_epi16(bot0, top0); | |||
346 | tmp = _mm_and_si128(tmp, vw); | |||
347 | r0 = _mm_sub_epi16(r0, tmp); | |||
348 | r0 = _mm_add_epi16(r0, top0); | |||
349 | r0 = _mm_srli_epi16(r0, BILINEAR_INTERPOLATION_BITS6); | |||
350 | /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */ | |||
351 | // r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1)); | |||
352 | /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */ | |||
353 | ||||
354 | // tmp = bot1 < top1 ? vw : 0; | |||
355 | // r1 = (bot1 - top1)*vw + top1 - tmp | |||
356 | // r1 = bot1*vw - vw*top1 + top1 - tmp | |||
357 | // r1 = bot1*vw + top1 - vw*top1 - tmp | |||
358 | // r1 = bot1*vw + top1*(1 - vw) - tmp | |||
359 | r1 = _mm_mulhi_epu16(_mm_sub_epi16(bot1, top1), vw); | |||
360 | tmp = _mm_cmplt_epi16(bot1, top1); | |||
361 | tmp = _mm_and_si128(tmp, vw); | |||
362 | r1 = _mm_sub_epi16(r1, tmp); | |||
363 | r1 = _mm_add_epi16(r1, top1); | |||
364 | r1 = _mm_srli_epi16(r1, BILINEAR_INTERPOLATION_BITS6); | |||
365 | // r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1)); | |||
366 | /* r1: A3 R3 G3 B3 A2 R2 G2 B2 */ | |||
367 | #else | |||
368 | __m128i r0, r1, p; | |||
369 | top0 = _mm_mulhi_epu16(top0, uvw); | |||
370 | bot0 = _mm_mulhi_epu16(bot0, vw); | |||
371 | r0 = _mm_add_epi16(top0, bot0); | |||
372 | r0 = _mm_srli_epi16(r0, BILINEAR_INTERPOLATION_BITS6 - 1); | |||
373 | ||||
374 | top1 = _mm_mulhi_epu16(top1, uvw); | |||
375 | bot1 = _mm_mulhi_epu16(bot1, vw); | |||
376 | r1 = _mm_add_epi16(top1, bot1); | |||
377 | r1 = _mm_srli_epi16(r1, BILINEAR_INTERPOLATION_BITS6 - 1); | |||
378 | #endif | |||
379 | ||||
380 | p = _mm_packus_epi16(r0, r1); | |||
381 | _mm_storeu_si128((__m128i*)(iter->buffer + i), p); | |||
382 | } | |||
383 | ||||
384 | while (i < iter->width) { | |||
385 | __m128i top0 = _mm_load_si128((__m128i*)(line0->buffer + i)); | |||
386 | __m128i bot0 = _mm_load_si128((__m128i*)(line1->buffer + i)); | |||
387 | ||||
388 | #ifdef PIXMAN_STYLE_INTERPOLATION | |||
389 | __m128i r0, tmp, p; | |||
390 | r0 = _mm_mulhi_epu16(_mm_sub_epi16(bot0, top0), vw); | |||
391 | tmp = _mm_cmplt_epi16(bot0, top0); | |||
392 | tmp = _mm_and_si128(tmp, vw); | |||
393 | r0 = _mm_sub_epi16(r0, tmp); | |||
394 | r0 = _mm_add_epi16(r0, top0); | |||
395 | r0 = _mm_srli_epi16(r0, BILINEAR_INTERPOLATION_BITS6); | |||
396 | /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */ | |||
397 | r0 = _mm_shuffle_epi32(r0, _MM_SHUFFLE(2, 0, 3, 1))((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(r0), (int)( (((2) << 6) | ((0) << 4) | ((3) << 2) | (1) )))); | |||
398 | /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */ | |||
399 | #else | |||
400 | __m128i r0, p; | |||
401 | top0 = _mm_mulhi_epu16(top0, uvw); | |||
402 | bot0 = _mm_mulhi_epu16(bot0, vw); | |||
403 | r0 = _mm_add_epi16(top0, bot0); | |||
404 | r0 = _mm_srli_epi16(r0, BILINEAR_INTERPOLATION_BITS6 - 1); | |||
405 | #endif | |||
406 | ||||
407 | p = _mm_packus_epi16(r0, r0); | |||
408 | ||||
409 | if (iter->width - i == 1) { | |||
410 | *(uint32_t*)(iter->buffer + i) = _mm_cvtsi128_si32(p); | |||
411 | i++; | |||
412 | } else { | |||
413 | _mm_storel_epi64((__m128i*)(iter->buffer + i), p); | |||
414 | i += 2; | |||
415 | } | |||
416 | } | |||
417 | ||||
418 | info->y += iter->image->transform->matrix[1][1]; | |||
419 | ||||
420 | return iter->buffer; | |||
421 | } | |||
422 | ||||
423 | static void ssse3_bilinear_cover_iter_fini(pixman_iter_t* iter) { | |||
424 | free(iter->data); | |||
425 | } | |||
426 | ||||
427 | static void ssse3_bilinear_cover_iter_init(pixman_iter_t* iter) { | |||
428 | int width = iter->width; | |||
429 | bilinear_info_t* info; | |||
430 | pixman_vector_t v; | |||
431 | ||||
432 | if (iter->x > PIXMAN_FIXED_INT_MAX32767 || iter->x < PIXMAN_FIXED_INT_MIN-32768 || | |||
433 | iter->y > PIXMAN_FIXED_INT_MAX32767 || iter->y < PIXMAN_FIXED_INT_MIN-32768) | |||
434 | goto fail; | |||
435 | ||||
436 | /* Reference point is the center of the pixel */ | |||
437 | v.vector[0] = pixman_int_to_fixed(iter->x)((pixman_fixed_t)((iter->x) << 16)) + pixman_fixed_1(((pixman_fixed_t)((1) << 16))) / 2; | |||
438 | v.vector[1] = pixman_int_to_fixed(iter->y)((pixman_fixed_t)((iter->y) << 16)) + pixman_fixed_1(((pixman_fixed_t)((1) << 16))) / 2; | |||
439 | v.vector[2] = pixman_fixed_1(((pixman_fixed_t)((1) << 16))); | |||
440 | ||||
441 | if (!pixman_transform_point_3d(iter->image->transform, &v)) goto fail; | |||
442 | ||||
443 | info = malloc(sizeof(*info) + (2 * width - 1) * sizeof(uint64_t) + 64); | |||
444 | if (!info) goto fail; | |||
445 | ||||
446 | info->x = v.vector[0] - pixman_fixed_1(((pixman_fixed_t)((1) << 16))) / 2; | |||
447 | info->y = v.vector[1] - pixman_fixed_1(((pixman_fixed_t)((1) << 16))) / 2; | |||
448 | ||||
449 | #define ALIGN(addr)((void*)((((uintptr_t)(addr)) + 15) & (~15))) ((void*)((((uintptr_t)(addr)) + 15) & (~15))) | |||
450 | ||||
451 | /* It is safe to set the y coordinates to -1 initially | |||
452 | * because COVER_CLIP_BILINEAR ensures that we will only | |||
453 | * be asked to fetch lines in the [0, height) interval | |||
454 | */ | |||
455 | info->lines[0].y = -1; | |||
456 | info->lines[0].buffer = ALIGN(&(info->data[0]))((void*)((((uintptr_t)(&(info->data[0]))) + 15) & ( ~15))); | |||
457 | info->lines[1].y = -1; | |||
458 | info->lines[1].buffer = ALIGN(info->lines[0].buffer + width)((void*)((((uintptr_t)(info->lines[0].buffer + width)) + 15 ) & (~15))); | |||
459 | ||||
460 | iter->fini = ssse3_bilinear_cover_iter_fini; | |||
461 | ||||
462 | iter->data = info; | |||
463 | return; | |||
464 | ||||
465 | fail: | |||
466 | /* Something went wrong, either a bad matrix or OOM; in such cases, | |||
467 | * we don't guarantee any particular rendering. | |||
468 | */ | |||
469 | iter->fini = NULL((void*)0); | |||
470 | } | |||
471 | ||||
472 | /* scale the src from src_width/height to dest_width/height drawn | |||
473 | * into the rectangle x,y width,height | |||
474 | * src_stride and dst_stride are 4 byte units */ | |||
475 | bool_Bool ssse3_scale_data(uint32_t* src, int src_width, int src_height, | |||
476 | int src_stride, uint32_t* dest, int dest_width, | |||
477 | int dest_height, int dest_stride, int x, int y, int width, | |||
478 | int height) { | |||
479 | // XXX: assert(src_width > 1) | |||
480 | pixman_transform_t transform = { | |||
481 | {{pixman_fixed_1(((pixman_fixed_t)((1) << 16))), 0, 0}, {0, pixman_fixed_1(((pixman_fixed_t)((1) << 16))), 0}, {0, 0, pixman_fixed_1(((pixman_fixed_t)((1) << 16)))}}}; | |||
482 | double width_scale = ((double)src_width) / dest_width; | |||
483 | double height_scale = ((double)src_height) / dest_height; | |||
484 | #define AVOID_PADDING | |||
485 | #ifdef AVOID_PADDING | |||
486 | // scale up by enough that we don't read outside of the bounds of the source | |||
487 | // surface currently this is required to avoid reading out of bounds. | |||
488 | if (width_scale < 1) { | |||
| ||||
489 | width_scale = (double)(src_width - 1) / dest_width; | |||
490 | transform.matrix[0][2] = pixman_fixed_1(((pixman_fixed_t)((1) << 16))) / 2; | |||
491 | } | |||
492 | if (height_scale < 1) { | |||
493 | height_scale = (double)(src_height - 1) / dest_height; | |||
494 | transform.matrix[1][2] = pixman_fixed_1(((pixman_fixed_t)((1) << 16))) / 2; | |||
495 | } | |||
496 | #endif | |||
497 | transform.matrix[0][0] = pixman_double_to_fixed(width_scale)((pixman_fixed_t)((width_scale) * 65536.0)); | |||
498 | transform.matrix[1][1] = pixman_double_to_fixed(height_scale)((pixman_fixed_t)((height_scale) * 65536.0)); | |||
499 | transform.matrix[2][2] = pixman_fixed_1(((pixman_fixed_t)((1) << 16))); | |||
500 | ||||
501 | bits_image_t image; | |||
502 | image.bits = src; | |||
503 | image.transform = &transform; | |||
504 | image.rowstride = src_stride; | |||
505 | ||||
506 | pixman_iter_t iter; | |||
507 | iter.image = ℑ | |||
508 | iter.x = x; | |||
509 | iter.y = y; | |||
510 | iter.width = width; | |||
511 | iter.height = src_height; | |||
512 | iter.buffer = dest; | |||
513 | iter.data = NULL((void*)0); | |||
514 | ||||
515 | ssse3_bilinear_cover_iter_init(&iter); | |||
516 | ||||
517 | if (!iter.fini
| |||
518 | ||||
519 | if (iter.data
| |||
520 | for (int iy = 0; iy < height; iy++) { | |||
521 | ssse3_fetch_bilinear_cover(&iter, NULL((void*)0)); | |||
522 | iter.buffer += dest_stride; | |||
523 | } | |||
524 | ssse3_bilinear_cover_iter_fini(&iter); | |||
525 | } | |||
526 | return true1; | |||
527 | } |