File: | usr/lib/llvm-19/lib/clang/19/include/emmintrin.h |
Warning: | line 3444, column 10 Access to field '__v' results in a dereference of a null pointer (loaded from variable '__p') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * Copyright © 2008 Rodrigo Kumpera | |||
3 | * Copyright © 2008 André Tupinambá | |||
4 | * | |||
5 | * Permission to use, copy, modify, distribute, and sell this software and its | |||
6 | * documentation for any purpose is hereby granted without fee, provided that | |||
7 | * the above copyright notice appear in all copies and that both that | |||
8 | * copyright notice and this permission notice appear in supporting | |||
9 | * documentation, and that the name of Red Hat not be used in advertising or | |||
10 | * publicity pertaining to distribution of the software without specific, | |||
11 | * written prior permission. Red Hat makes no representations about the | |||
12 | * suitability of this software for any purpose. It is provided "as is" | |||
13 | * without express or implied warranty. | |||
14 | * | |||
15 | * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS | |||
16 | * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND | |||
17 | * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
18 | * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
19 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN | |||
20 | * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING | |||
21 | * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS | |||
22 | * SOFTWARE. | |||
23 | * | |||
24 | * Author: Rodrigo Kumpera (kumpera@gmail.com) | |||
25 | * André Tupinambá (andrelrt@gmail.com) | |||
26 | * | |||
27 | * Based on work by Owen Taylor and Søren Sandmann | |||
28 | */ | |||
29 | #ifdef HAVE_CONFIG_H | |||
30 | #include <pixman-config.h> | |||
31 | #endif | |||
32 | ||||
33 | /* PSHUFD is slow on a lot of old processors, and new processors have SSSE3 */ | |||
34 | #define PSHUFD_IS_FAST0 0 | |||
35 | ||||
36 | #include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */ | |||
37 | #include <emmintrin.h> /* for SSE2 intrinsics */ | |||
38 | #include "pixman-private.h" | |||
39 | #include "pixman-combine32.h" | |||
40 | #include "pixman-inlines.h" | |||
41 | ||||
42 | static __m128i mask_0080; | |||
43 | static __m128i mask_00ff; | |||
44 | static __m128i mask_0101; | |||
45 | static __m128i mask_ffff; | |||
46 | static __m128i mask_ff000000; | |||
47 | static __m128i mask_alpha; | |||
48 | ||||
49 | static __m128i mask_565_r; | |||
50 | static __m128i mask_565_g1, mask_565_g2; | |||
51 | static __m128i mask_565_b; | |||
52 | static __m128i mask_red; | |||
53 | static __m128i mask_green; | |||
54 | static __m128i mask_blue; | |||
55 | ||||
56 | static __m128i mask_565_fix_rb; | |||
57 | static __m128i mask_565_fix_g; | |||
58 | ||||
59 | static __m128i mask_565_rb; | |||
60 | static __m128i mask_565_pack_multiplier; | |||
61 | ||||
62 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
63 | unpack_32_1x128 (uint32_t data) | |||
64 | { | |||
65 | return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ()); | |||
66 | } | |||
67 | ||||
68 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
69 | unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi) | |||
70 | { | |||
71 | *data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ()); | |||
72 | *data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ()); | |||
73 | } | |||
74 | ||||
75 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
76 | unpack_565_to_8888 (__m128i lo) | |||
77 | { | |||
78 | __m128i r, g, b, rb, t; | |||
79 | ||||
80 | r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red); | |||
81 | g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green); | |||
82 | b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue); | |||
83 | ||||
84 | rb = _mm_or_si128 (r, b); | |||
85 | t = _mm_and_si128 (rb, mask_565_fix_rb); | |||
86 | t = _mm_srli_epi32 (t, 5); | |||
87 | rb = _mm_or_si128 (rb, t); | |||
88 | ||||
89 | t = _mm_and_si128 (g, mask_565_fix_g); | |||
90 | t = _mm_srli_epi32 (t, 6); | |||
91 | g = _mm_or_si128 (g, t); | |||
92 | ||||
93 | return _mm_or_si128 (rb, g); | |||
94 | } | |||
95 | ||||
96 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
97 | unpack_565_128_4x128 (__m128i data, | |||
98 | __m128i* data0, | |||
99 | __m128i* data1, | |||
100 | __m128i* data2, | |||
101 | __m128i* data3) | |||
102 | { | |||
103 | __m128i lo, hi; | |||
104 | ||||
105 | lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ()); | |||
106 | hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ()); | |||
107 | ||||
108 | lo = unpack_565_to_8888 (lo); | |||
109 | hi = unpack_565_to_8888 (hi); | |||
110 | ||||
111 | unpack_128_2x128 (lo, data0, data1); | |||
112 | unpack_128_2x128 (hi, data2, data3); | |||
113 | } | |||
114 | ||||
115 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint16_t | |||
116 | pack_565_32_16 (uint32_t pixel) | |||
117 | { | |||
118 | return (uint16_t) (((pixel >> 8) & 0xf800) | | |||
119 | ((pixel >> 5) & 0x07e0) | | |||
120 | ((pixel >> 3) & 0x001f)); | |||
121 | } | |||
122 | ||||
123 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
124 | pack_2x128_128 (__m128i lo, __m128i hi) | |||
125 | { | |||
126 | return _mm_packus_epi16 (lo, hi); | |||
127 | } | |||
128 | ||||
129 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
130 | pack_565_2packedx128_128 (__m128i lo, __m128i hi) | |||
131 | { | |||
132 | __m128i rb0 = _mm_and_si128 (lo, mask_565_rb); | |||
133 | __m128i rb1 = _mm_and_si128 (hi, mask_565_rb); | |||
134 | ||||
135 | __m128i t0 = _mm_madd_epi16 (rb0, mask_565_pack_multiplier); | |||
136 | __m128i t1 = _mm_madd_epi16 (rb1, mask_565_pack_multiplier); | |||
137 | ||||
138 | __m128i g0 = _mm_and_si128 (lo, mask_green); | |||
139 | __m128i g1 = _mm_and_si128 (hi, mask_green); | |||
140 | ||||
141 | t0 = _mm_or_si128 (t0, g0); | |||
142 | t1 = _mm_or_si128 (t1, g1); | |||
143 | ||||
144 | /* Simulates _mm_packus_epi32 */ | |||
145 | t0 = _mm_slli_epi32 (t0, 16 - 5); | |||
146 | t1 = _mm_slli_epi32 (t1, 16 - 5); | |||
147 | t0 = _mm_srai_epi32 (t0, 16); | |||
148 | t1 = _mm_srai_epi32 (t1, 16); | |||
149 | return _mm_packs_epi32 (t0, t1); | |||
150 | } | |||
151 | ||||
152 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
153 | pack_565_2x128_128 (__m128i lo, __m128i hi) | |||
154 | { | |||
155 | __m128i data; | |||
156 | __m128i r, g1, g2, b; | |||
157 | ||||
158 | data = pack_2x128_128 (lo, hi); | |||
159 | ||||
160 | r = _mm_and_si128 (data, mask_565_r); | |||
161 | g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1); | |||
162 | g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2); | |||
163 | b = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b); | |||
164 | ||||
165 | return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b); | |||
166 | } | |||
167 | ||||
168 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
169 | pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3) | |||
170 | { | |||
171 | return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1), | |||
172 | pack_565_2x128_128 (*xmm2, *xmm3)); | |||
173 | } | |||
174 | ||||
175 | static force_inline__inline__ __attribute__ ((__always_inline__)) int | |||
176 | is_opaque (__m128i x) | |||
177 | { | |||
178 | __m128i ffs = _mm_cmpeq_epi8 (x, x); | |||
179 | ||||
180 | return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888; | |||
181 | } | |||
182 | ||||
183 | static force_inline__inline__ __attribute__ ((__always_inline__)) int | |||
184 | is_zero (__m128i x) | |||
185 | { | |||
186 | return _mm_movemask_epi8 ( | |||
187 | _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff; | |||
188 | } | |||
189 | ||||
190 | static force_inline__inline__ __attribute__ ((__always_inline__)) int | |||
191 | is_transparent (__m128i x) | |||
192 | { | |||
193 | return (_mm_movemask_epi8 ( | |||
194 | _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888; | |||
195 | } | |||
196 | ||||
197 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
198 | expand_pixel_32_1x128 (uint32_t data) | |||
199 | { | |||
200 | return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0))((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(unpack_32_1x128 (data)), (int)((((1) << 6) | ((0) << 4) | ((1) << 2) | (0))))); | |||
201 | } | |||
202 | ||||
203 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
204 | expand_alpha_1x128 (__m128i data) | |||
205 | { | |||
206 | return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data,((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(((__m128i) __builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int)((((3) << 6) | ((3) << 4) | ((3) << 2) | (3)))))), (int)(( ((3) << 6) | ((3) << 4) | ((3) << 2) | (3)) ))) | |||
207 | _MM_SHUFFLE (3, 3, 3, 3)),((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(((__m128i) __builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int)((((3) << 6) | ((3) << 4) | ((3) << 2) | (3)))))), (int)(( ((3) << 6) | ((3) << 4) | ((3) << 2) | (3)) ))) | |||
208 | _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(((__m128i) __builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int)((((3) << 6) | ((3) << 4) | ((3) << 2) | (3)))))), (int)(( ((3) << 6) | ((3) << 4) | ((3) << 2) | (3)) ))); | |||
209 | } | |||
210 | ||||
211 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
212 | expand_alpha_2x128 (__m128i data_lo, | |||
213 | __m128i data_hi, | |||
214 | __m128i* alpha_lo, | |||
215 | __m128i* alpha_hi) | |||
216 | { | |||
217 | __m128i lo, hi; | |||
218 | ||||
219 | lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_lo), ( int)((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))))); | |||
220 | hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_hi), ( int)((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))))); | |||
221 | ||||
222 | *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(lo), (int) ((((3) << 6) | ((3) << 4) | ((3) << 2) | (3 ))))); | |||
223 | *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(hi), (int) ((((3) << 6) | ((3) << 4) | ((3) << 2) | (3 ))))); | |||
224 | } | |||
225 | ||||
226 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
227 | expand_alpha_rev_2x128 (__m128i data_lo, | |||
228 | __m128i data_hi, | |||
229 | __m128i* alpha_lo, | |||
230 | __m128i* alpha_hi) | |||
231 | { | |||
232 | __m128i lo, hi; | |||
233 | ||||
234 | lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_lo), ( int)((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))))); | |||
235 | hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_hi), ( int)((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))))); | |||
236 | *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(lo), (int) ((((0) << 6) | ((0) << 4) | ((0) << 2) | (0 ))))); | |||
237 | *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(hi), (int) ((((0) << 6) | ((0) << 4) | ((0) << 2) | (0 ))))); | |||
238 | } | |||
239 | ||||
240 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
241 | pix_multiply_2x128 (__m128i* data_lo, | |||
242 | __m128i* data_hi, | |||
243 | __m128i* alpha_lo, | |||
244 | __m128i* alpha_hi, | |||
245 | __m128i* ret_lo, | |||
246 | __m128i* ret_hi) | |||
247 | { | |||
248 | __m128i lo, hi; | |||
249 | ||||
250 | lo = _mm_mullo_epi16 (*data_lo, *alpha_lo); | |||
251 | hi = _mm_mullo_epi16 (*data_hi, *alpha_hi); | |||
252 | lo = _mm_adds_epu16 (lo, mask_0080); | |||
253 | hi = _mm_adds_epu16 (hi, mask_0080); | |||
254 | *ret_lo = _mm_mulhi_epu16 (lo, mask_0101); | |||
255 | *ret_hi = _mm_mulhi_epu16 (hi, mask_0101); | |||
256 | } | |||
257 | ||||
258 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
259 | pix_add_multiply_2x128 (__m128i* src_lo, | |||
260 | __m128i* src_hi, | |||
261 | __m128i* alpha_dst_lo, | |||
262 | __m128i* alpha_dst_hi, | |||
263 | __m128i* dst_lo, | |||
264 | __m128i* dst_hi, | |||
265 | __m128i* alpha_src_lo, | |||
266 | __m128i* alpha_src_hi, | |||
267 | __m128i* ret_lo, | |||
268 | __m128i* ret_hi) | |||
269 | { | |||
270 | __m128i t1_lo, t1_hi; | |||
271 | __m128i t2_lo, t2_hi; | |||
272 | ||||
273 | pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi); | |||
274 | pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi); | |||
275 | ||||
276 | *ret_lo = _mm_adds_epu8 (t1_lo, t2_lo); | |||
277 | *ret_hi = _mm_adds_epu8 (t1_hi, t2_hi); | |||
278 | } | |||
279 | ||||
280 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
281 | negate_2x128 (__m128i data_lo, | |||
282 | __m128i data_hi, | |||
283 | __m128i* neg_lo, | |||
284 | __m128i* neg_hi) | |||
285 | { | |||
286 | *neg_lo = _mm_xor_si128 (data_lo, mask_00ff); | |||
287 | *neg_hi = _mm_xor_si128 (data_hi, mask_00ff); | |||
288 | } | |||
289 | ||||
290 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
291 | invert_colors_2x128 (__m128i data_lo, | |||
292 | __m128i data_hi, | |||
293 | __m128i* inv_lo, | |||
294 | __m128i* inv_hi) | |||
295 | { | |||
296 | __m128i lo, hi; | |||
297 | ||||
298 | lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_lo), ( int)((((3) << 6) | ((0) << 4) | ((1) << 2) | (2))))); | |||
299 | hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_hi), ( int)((((3) << 6) | ((0) << 4) | ((1) << 2) | (2))))); | |||
300 | *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(lo), (int) ((((3) << 6) | ((0) << 4) | ((1) << 2) | (2 ))))); | |||
301 | *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(hi), (int) ((((3) << 6) | ((0) << 4) | ((1) << 2) | (2 ))))); | |||
302 | } | |||
303 | ||||
304 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
305 | over_2x128 (__m128i* src_lo, | |||
306 | __m128i* src_hi, | |||
307 | __m128i* alpha_lo, | |||
308 | __m128i* alpha_hi, | |||
309 | __m128i* dst_lo, | |||
310 | __m128i* dst_hi) | |||
311 | { | |||
312 | __m128i t1, t2; | |||
313 | ||||
314 | negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2); | |||
315 | ||||
316 | pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi); | |||
317 | ||||
318 | *dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo); | |||
319 | *dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi); | |||
320 | } | |||
321 | ||||
322 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
323 | over_rev_non_pre_2x128 (__m128i src_lo, | |||
324 | __m128i src_hi, | |||
325 | __m128i* dst_lo, | |||
326 | __m128i* dst_hi) | |||
327 | { | |||
328 | __m128i lo, hi; | |||
329 | __m128i alpha_lo, alpha_hi; | |||
330 | ||||
331 | expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi); | |||
332 | ||||
333 | lo = _mm_or_si128 (alpha_lo, mask_alpha); | |||
334 | hi = _mm_or_si128 (alpha_hi, mask_alpha); | |||
335 | ||||
336 | invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi); | |||
337 | ||||
338 | pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi); | |||
339 | ||||
340 | over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi); | |||
341 | } | |||
342 | ||||
343 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
344 | in_over_2x128 (__m128i* src_lo, | |||
345 | __m128i* src_hi, | |||
346 | __m128i* alpha_lo, | |||
347 | __m128i* alpha_hi, | |||
348 | __m128i* mask_lo, | |||
349 | __m128i* mask_hi, | |||
350 | __m128i* dst_lo, | |||
351 | __m128i* dst_hi) | |||
352 | { | |||
353 | __m128i s_lo, s_hi; | |||
354 | __m128i a_lo, a_hi; | |||
355 | ||||
356 | pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi); | |||
357 | pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi); | |||
358 | ||||
359 | over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi); | |||
360 | } | |||
361 | ||||
362 | /* load 4 pixels from a 16-byte boundary aligned address */ | |||
363 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
364 | load_128_aligned (__m128i* src) | |||
365 | { | |||
366 | return _mm_load_si128 (src); | |||
367 | } | |||
368 | ||||
369 | /* load 4 pixels from a unaligned address */ | |||
370 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
371 | load_128_unaligned (const __m128i* src) | |||
372 | { | |||
373 | return _mm_loadu_si128 (src); | |||
374 | } | |||
375 | ||||
376 | /* save 4 pixels on a 16-byte boundary aligned address */ | |||
377 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
378 | save_128_aligned (__m128i* dst, | |||
379 | __m128i data) | |||
380 | { | |||
381 | _mm_store_si128 (dst, data); | |||
382 | } | |||
383 | ||||
384 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
385 | load_32_1x128 (uint32_t data) | |||
386 | { | |||
387 | return _mm_cvtsi32_si128 (data); | |||
388 | } | |||
389 | ||||
390 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
391 | expand_alpha_rev_1x128 (__m128i data) | |||
392 | { | |||
393 | return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int )((((0) << 6) | ((0) << 4) | ((0) << 2) | ( 0))))); | |||
394 | } | |||
395 | ||||
396 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
397 | expand_pixel_8_1x128 (uint8_t data) | |||
398 | { | |||
399 | return _mm_shufflelo_epi16 (((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(unpack_32_1x128 ((uint32_t)data)), (int)((((0) << 6) | ((0) << 4 ) | ((0) << 2) | (0))))) | |||
400 | unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(unpack_32_1x128 ((uint32_t)data)), (int)((((0) << 6) | ((0) << 4 ) | ((0) << 2) | (0))))); | |||
401 | } | |||
402 | ||||
403 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
404 | pix_multiply_1x128 (__m128i data, | |||
405 | __m128i alpha) | |||
406 | { | |||
407 | return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha), | |||
408 | mask_0080), | |||
409 | mask_0101); | |||
410 | } | |||
411 | ||||
412 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
413 | pix_add_multiply_1x128 (__m128i* src, | |||
414 | __m128i* alpha_dst, | |||
415 | __m128i* dst, | |||
416 | __m128i* alpha_src) | |||
417 | { | |||
418 | __m128i t1 = pix_multiply_1x128 (*src, *alpha_dst); | |||
419 | __m128i t2 = pix_multiply_1x128 (*dst, *alpha_src); | |||
420 | ||||
421 | return _mm_adds_epu8 (t1, t2); | |||
422 | } | |||
423 | ||||
424 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
425 | negate_1x128 (__m128i data) | |||
426 | { | |||
427 | return _mm_xor_si128 (data, mask_00ff); | |||
428 | } | |||
429 | ||||
430 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
431 | invert_colors_1x128 (__m128i data) | |||
432 | { | |||
433 | return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int )((((3) << 6) | ((0) << 4) | ((1) << 2) | ( 2))))); | |||
434 | } | |||
435 | ||||
436 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
437 | over_1x128 (__m128i src, __m128i alpha, __m128i dst) | |||
438 | { | |||
439 | return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha))); | |||
440 | } | |||
441 | ||||
442 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
443 | in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst) | |||
444 | { | |||
445 | return over_1x128 (pix_multiply_1x128 (*src, *mask), | |||
446 | pix_multiply_1x128 (*alpha, *mask), | |||
447 | *dst); | |||
448 | } | |||
449 | ||||
450 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
451 | over_rev_non_pre_1x128 (__m128i src, __m128i dst) | |||
452 | { | |||
453 | __m128i alpha = expand_alpha_1x128 (src); | |||
454 | ||||
455 | return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src), | |||
456 | _mm_or_si128 (alpha, mask_alpha)), | |||
457 | alpha, | |||
458 | dst); | |||
459 | } | |||
460 | ||||
461 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | |||
462 | pack_1x128_32 (__m128i data) | |||
463 | { | |||
464 | return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ())); | |||
465 | } | |||
466 | ||||
467 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
468 | expand565_16_1x128 (uint16_t pixel) | |||
469 | { | |||
470 | __m128i m = _mm_cvtsi32_si128 (pixel); | |||
471 | ||||
472 | m = unpack_565_to_8888 (m); | |||
473 | ||||
474 | return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ()); | |||
475 | } | |||
476 | ||||
477 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | |||
478 | core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst) | |||
479 | { | |||
480 | uint8_t a; | |||
481 | __m128i xmms; | |||
482 | ||||
483 | a = src >> 24; | |||
484 | ||||
485 | if (a == 0xff) | |||
486 | { | |||
487 | return src; | |||
488 | } | |||
489 | else if (src) | |||
490 | { | |||
491 | xmms = unpack_32_1x128 (src); | |||
492 | return pack_1x128_32 ( | |||
493 | over_1x128 (xmms, expand_alpha_1x128 (xmms), | |||
494 | unpack_32_1x128 (dst))); | |||
495 | } | |||
496 | ||||
497 | return dst; | |||
498 | } | |||
499 | ||||
500 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | |||
501 | combine1 (const uint32_t *ps, const uint32_t *pm) | |||
502 | { | |||
503 | uint32_t s; | |||
504 | memcpy(&s, ps, sizeof(uint32_t)); | |||
505 | ||||
506 | if (pm) | |||
507 | { | |||
508 | __m128i ms, mm; | |||
509 | ||||
510 | mm = unpack_32_1x128 (*pm); | |||
511 | mm = expand_alpha_1x128 (mm); | |||
512 | ||||
513 | ms = unpack_32_1x128 (s); | |||
514 | ms = pix_multiply_1x128 (ms, mm); | |||
515 | ||||
516 | s = pack_1x128_32 (ms); | |||
517 | } | |||
518 | ||||
519 | return s; | |||
520 | } | |||
521 | ||||
522 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
523 | combine4 (const __m128i *ps, const __m128i *pm) | |||
524 | { | |||
525 | __m128i xmm_src_lo, xmm_src_hi; | |||
526 | __m128i xmm_msk_lo, xmm_msk_hi; | |||
527 | __m128i s; | |||
528 | ||||
529 | if (pm) | |||
530 | { | |||
531 | xmm_msk_lo = load_128_unaligned (pm); | |||
532 | ||||
533 | if (is_transparent (xmm_msk_lo)) | |||
534 | return _mm_setzero_si128 (); | |||
535 | } | |||
536 | ||||
537 | s = load_128_unaligned (ps); | |||
538 | ||||
539 | if (pm) | |||
540 | { | |||
541 | unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi); | |||
542 | unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi); | |||
543 | ||||
544 | expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi); | |||
545 | ||||
546 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
547 | &xmm_msk_lo, &xmm_msk_hi, | |||
548 | &xmm_src_lo, &xmm_src_hi); | |||
549 | ||||
550 | s = pack_2x128_128 (xmm_src_lo, xmm_src_hi); | |||
551 | } | |||
552 | ||||
553 | return s; | |||
554 | } | |||
555 | ||||
556 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
557 | core_combine_over_u_sse2_mask (uint32_t * pd, | |||
558 | const uint32_t* ps, | |||
559 | const uint32_t* pm, | |||
560 | int w) | |||
561 | { | |||
562 | uint32_t s, d; | |||
563 | ||||
564 | /* Align dst on a 16-byte boundary */ | |||
565 | while (w && ((uintptr_t)pd & 15)) | |||
| ||||
566 | { | |||
567 | d = *pd; | |||
568 | s = combine1 (ps, pm); | |||
569 | ||||
570 | if (s) | |||
571 | *pd = core_combine_over_u_pixel_sse2 (s, d); | |||
572 | pd++; | |||
573 | ps++; | |||
574 | pm++; | |||
575 | w--; | |||
576 | } | |||
577 | ||||
578 | while (w >= 4) | |||
579 | { | |||
580 | __m128i mask = load_128_unaligned ((__m128i *)pm); | |||
581 | ||||
582 | if (!is_zero (mask)) | |||
583 | { | |||
584 | __m128i src; | |||
585 | __m128i src_hi, src_lo; | |||
586 | __m128i mask_hi, mask_lo; | |||
587 | __m128i alpha_hi, alpha_lo; | |||
588 | ||||
589 | src = load_128_unaligned ((__m128i *)ps); | |||
590 | ||||
591 | if (is_opaque (_mm_and_si128 (src, mask))) | |||
592 | { | |||
593 | save_128_aligned ((__m128i *)pd, src); | |||
594 | } | |||
595 | else | |||
596 | { | |||
597 | __m128i dst = load_128_aligned ((__m128i *)pd); | |||
598 | __m128i dst_hi, dst_lo; | |||
599 | ||||
600 | unpack_128_2x128 (mask, &mask_lo, &mask_hi); | |||
601 | unpack_128_2x128 (src, &src_lo, &src_hi); | |||
602 | ||||
603 | expand_alpha_2x128 (mask_lo, mask_hi, &mask_lo, &mask_hi); | |||
604 | pix_multiply_2x128 (&src_lo, &src_hi, | |||
605 | &mask_lo, &mask_hi, | |||
606 | &src_lo, &src_hi); | |||
607 | ||||
608 | unpack_128_2x128 (dst, &dst_lo, &dst_hi); | |||
609 | ||||
610 | expand_alpha_2x128 (src_lo, src_hi, | |||
611 | &alpha_lo, &alpha_hi); | |||
612 | ||||
613 | over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi, | |||
614 | &dst_lo, &dst_hi); | |||
615 | ||||
616 | save_128_aligned ( | |||
617 | (__m128i *)pd, | |||
618 | pack_2x128_128 (dst_lo, dst_hi)); | |||
619 | } | |||
620 | } | |||
621 | ||||
622 | pm += 4; | |||
623 | ps += 4; | |||
624 | pd += 4; | |||
625 | w -= 4; | |||
626 | } | |||
627 | while (w) | |||
628 | { | |||
629 | d = *pd; | |||
630 | s = combine1 (ps, pm); | |||
631 | ||||
632 | if (s) | |||
633 | *pd = core_combine_over_u_pixel_sse2 (s, d); | |||
634 | pd++; | |||
635 | ps++; | |||
636 | pm++; | |||
637 | ||||
638 | w--; | |||
639 | } | |||
640 | } | |||
641 | ||||
642 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
643 | core_combine_over_u_sse2_no_mask (uint32_t * pd, | |||
644 | const uint32_t* ps, | |||
645 | int w) | |||
646 | { | |||
647 | uint32_t s, d; | |||
648 | ||||
649 | /* Align dst on a 16-byte boundary */ | |||
650 | while (w && ((uintptr_t)pd & 15)) | |||
651 | { | |||
652 | d = *pd; | |||
653 | s = *ps; | |||
654 | ||||
655 | if (s) | |||
656 | *pd = core_combine_over_u_pixel_sse2 (s, d); | |||
657 | pd++; | |||
658 | ps++; | |||
659 | w--; | |||
660 | } | |||
661 | ||||
662 | while (w >= 4) | |||
663 | { | |||
664 | __m128i src; | |||
665 | __m128i src_hi, src_lo, dst_hi, dst_lo; | |||
666 | __m128i alpha_hi, alpha_lo; | |||
667 | ||||
668 | src = load_128_unaligned ((__m128i *)ps); | |||
669 | ||||
670 | if (!is_zero (src)) | |||
671 | { | |||
672 | if (is_opaque (src)) | |||
673 | { | |||
674 | save_128_aligned ((__m128i *)pd, src); | |||
675 | } | |||
676 | else | |||
677 | { | |||
678 | __m128i dst = load_128_aligned ((__m128i *)pd); | |||
679 | ||||
680 | unpack_128_2x128 (src, &src_lo, &src_hi); | |||
681 | unpack_128_2x128 (dst, &dst_lo, &dst_hi); | |||
682 | ||||
683 | expand_alpha_2x128 (src_lo, src_hi, | |||
684 | &alpha_lo, &alpha_hi); | |||
685 | over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi, | |||
686 | &dst_lo, &dst_hi); | |||
687 | ||||
688 | save_128_aligned ( | |||
689 | (__m128i *)pd, | |||
690 | pack_2x128_128 (dst_lo, dst_hi)); | |||
691 | } | |||
692 | } | |||
693 | ||||
694 | ps += 4; | |||
695 | pd += 4; | |||
696 | w -= 4; | |||
697 | } | |||
698 | while (w) | |||
699 | { | |||
700 | d = *pd; | |||
701 | s = *ps; | |||
702 | ||||
703 | if (s) | |||
704 | *pd = core_combine_over_u_pixel_sse2 (s, d); | |||
705 | pd++; | |||
706 | ps++; | |||
707 | ||||
708 | w--; | |||
709 | } | |||
710 | } | |||
711 | ||||
712 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
713 | sse2_combine_over_u (pixman_implementation_t *imp, | |||
714 | pixman_op_t op, | |||
715 | uint32_t * pd, | |||
716 | const uint32_t * ps, | |||
717 | const uint32_t * pm, | |||
718 | int w) | |||
719 | { | |||
720 | if (pm) | |||
721 | core_combine_over_u_sse2_mask (pd, ps, pm, w); | |||
722 | else | |||
723 | core_combine_over_u_sse2_no_mask (pd, ps, w); | |||
724 | } | |||
725 | ||||
726 | static void | |||
727 | sse2_combine_over_reverse_u (pixman_implementation_t *imp, | |||
728 | pixman_op_t op, | |||
729 | uint32_t * pd, | |||
730 | const uint32_t * ps, | |||
731 | const uint32_t * pm, | |||
732 | int w) | |||
733 | { | |||
734 | uint32_t s, d; | |||
735 | ||||
736 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
737 | __m128i xmm_src_lo, xmm_src_hi; | |||
738 | __m128i xmm_alpha_lo, xmm_alpha_hi; | |||
739 | ||||
740 | /* Align dst on a 16-byte boundary */ | |||
741 | while (w && | |||
742 | ((uintptr_t)pd & 15)) | |||
743 | { | |||
744 | d = *pd; | |||
745 | s = combine1 (ps, pm); | |||
746 | ||||
747 | *pd++ = core_combine_over_u_pixel_sse2 (d, s); | |||
748 | w--; | |||
749 | ps++; | |||
750 | if (pm) | |||
751 | pm++; | |||
752 | } | |||
753 | ||||
754 | while (w >= 4) | |||
755 | { | |||
756 | /* I'm loading unaligned because I'm not sure | |||
757 | * about the address alignment. | |||
758 | */ | |||
759 | xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); | |||
760 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | |||
761 | ||||
762 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
763 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
764 | ||||
765 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | |||
766 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
767 | ||||
768 | over_2x128 (&xmm_dst_lo, &xmm_dst_hi, | |||
769 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
770 | &xmm_src_lo, &xmm_src_hi); | |||
771 | ||||
772 | /* rebuid the 4 pixel data and save*/ | |||
773 | save_128_aligned ((__m128i*)pd, | |||
774 | pack_2x128_128 (xmm_src_lo, xmm_src_hi)); | |||
775 | ||||
776 | w -= 4; | |||
777 | ps += 4; | |||
778 | pd += 4; | |||
779 | ||||
780 | if (pm) | |||
781 | pm += 4; | |||
782 | } | |||
783 | ||||
784 | while (w) | |||
785 | { | |||
786 | d = *pd; | |||
787 | s = combine1 (ps, pm); | |||
788 | ||||
789 | *pd++ = core_combine_over_u_pixel_sse2 (d, s); | |||
790 | ps++; | |||
791 | w--; | |||
792 | if (pm) | |||
793 | pm++; | |||
794 | } | |||
795 | } | |||
796 | ||||
797 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | |||
798 | core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst) | |||
799 | { | |||
800 | uint32_t maska = src >> 24; | |||
801 | ||||
802 | if (maska == 0) | |||
803 | { | |||
804 | return 0; | |||
805 | } | |||
806 | else if (maska != 0xff) | |||
807 | { | |||
808 | return pack_1x128_32 ( | |||
809 | pix_multiply_1x128 (unpack_32_1x128 (dst), | |||
810 | expand_alpha_1x128 (unpack_32_1x128 (src)))); | |||
811 | } | |||
812 | ||||
813 | return dst; | |||
814 | } | |||
815 | ||||
816 | static void | |||
817 | sse2_combine_in_u (pixman_implementation_t *imp, | |||
818 | pixman_op_t op, | |||
819 | uint32_t * pd, | |||
820 | const uint32_t * ps, | |||
821 | const uint32_t * pm, | |||
822 | int w) | |||
823 | { | |||
824 | uint32_t s, d; | |||
825 | ||||
826 | __m128i xmm_src_lo, xmm_src_hi; | |||
827 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
828 | ||||
829 | while (w && ((uintptr_t)pd & 15)) | |||
830 | { | |||
831 | s = combine1 (ps, pm); | |||
832 | d = *pd; | |||
833 | ||||
834 | *pd++ = core_combine_in_u_pixel_sse2 (d, s); | |||
835 | w--; | |||
836 | ps++; | |||
837 | if (pm) | |||
838 | pm++; | |||
839 | } | |||
840 | ||||
841 | while (w >= 4) | |||
842 | { | |||
843 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | |||
844 | xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm); | |||
845 | ||||
846 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
847 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
848 | ||||
849 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
850 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
851 | &xmm_dst_lo, &xmm_dst_hi, | |||
852 | &xmm_dst_lo, &xmm_dst_hi); | |||
853 | ||||
854 | save_128_aligned ((__m128i*)pd, | |||
855 | pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
856 | ||||
857 | ps += 4; | |||
858 | pd += 4; | |||
859 | w -= 4; | |||
860 | if (pm) | |||
861 | pm += 4; | |||
862 | } | |||
863 | ||||
864 | while (w) | |||
865 | { | |||
866 | s = combine1 (ps, pm); | |||
867 | d = *pd; | |||
868 | ||||
869 | *pd++ = core_combine_in_u_pixel_sse2 (d, s); | |||
870 | w--; | |||
871 | ps++; | |||
872 | if (pm) | |||
873 | pm++; | |||
874 | } | |||
875 | } | |||
876 | ||||
877 | static void | |||
878 | sse2_combine_in_reverse_u (pixman_implementation_t *imp, | |||
879 | pixman_op_t op, | |||
880 | uint32_t * pd, | |||
881 | const uint32_t * ps, | |||
882 | const uint32_t * pm, | |||
883 | int w) | |||
884 | { | |||
885 | uint32_t s, d; | |||
886 | ||||
887 | __m128i xmm_src_lo, xmm_src_hi; | |||
888 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
889 | ||||
890 | while (w && ((uintptr_t)pd & 15)) | |||
891 | { | |||
892 | s = combine1 (ps, pm); | |||
893 | d = *pd; | |||
894 | ||||
895 | *pd++ = core_combine_in_u_pixel_sse2 (s, d); | |||
896 | ps++; | |||
897 | w--; | |||
898 | if (pm) | |||
899 | pm++; | |||
900 | } | |||
901 | ||||
902 | while (w >= 4) | |||
903 | { | |||
904 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | |||
905 | xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); | |||
906 | ||||
907 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
908 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
909 | ||||
910 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
911 | pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, | |||
912 | &xmm_src_lo, &xmm_src_hi, | |||
913 | &xmm_dst_lo, &xmm_dst_hi); | |||
914 | ||||
915 | save_128_aligned ( | |||
916 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
917 | ||||
918 | ps += 4; | |||
919 | pd += 4; | |||
920 | w -= 4; | |||
921 | if (pm) | |||
922 | pm += 4; | |||
923 | } | |||
924 | ||||
925 | while (w) | |||
926 | { | |||
927 | s = combine1 (ps, pm); | |||
928 | d = *pd; | |||
929 | ||||
930 | *pd++ = core_combine_in_u_pixel_sse2 (s, d); | |||
931 | w--; | |||
932 | ps++; | |||
933 | if (pm) | |||
934 | pm++; | |||
935 | } | |||
936 | } | |||
937 | ||||
938 | static void | |||
939 | sse2_combine_out_reverse_u (pixman_implementation_t *imp, | |||
940 | pixman_op_t op, | |||
941 | uint32_t * pd, | |||
942 | const uint32_t * ps, | |||
943 | const uint32_t * pm, | |||
944 | int w) | |||
945 | { | |||
946 | while (w && ((uintptr_t)pd & 15)) | |||
947 | { | |||
948 | uint32_t s = combine1 (ps, pm); | |||
949 | uint32_t d = *pd; | |||
950 | ||||
951 | *pd++ = pack_1x128_32 ( | |||
952 | pix_multiply_1x128 ( | |||
953 | unpack_32_1x128 (d), negate_1x128 ( | |||
954 | expand_alpha_1x128 (unpack_32_1x128 (s))))); | |||
955 | ||||
956 | if (pm) | |||
957 | pm++; | |||
958 | ps++; | |||
959 | w--; | |||
960 | } | |||
961 | ||||
962 | while (w >= 4) | |||
963 | { | |||
964 | __m128i xmm_src_lo, xmm_src_hi; | |||
965 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
966 | ||||
967 | xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); | |||
968 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | |||
969 | ||||
970 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
971 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
972 | ||||
973 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
974 | negate_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
975 | ||||
976 | pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, | |||
977 | &xmm_src_lo, &xmm_src_hi, | |||
978 | &xmm_dst_lo, &xmm_dst_hi); | |||
979 | ||||
980 | save_128_aligned ( | |||
981 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
982 | ||||
983 | ps += 4; | |||
984 | pd += 4; | |||
985 | if (pm) | |||
986 | pm += 4; | |||
987 | ||||
988 | w -= 4; | |||
989 | } | |||
990 | ||||
991 | while (w) | |||
992 | { | |||
993 | uint32_t s = combine1 (ps, pm); | |||
994 | uint32_t d = *pd; | |||
995 | ||||
996 | *pd++ = pack_1x128_32 ( | |||
997 | pix_multiply_1x128 ( | |||
998 | unpack_32_1x128 (d), negate_1x128 ( | |||
999 | expand_alpha_1x128 (unpack_32_1x128 (s))))); | |||
1000 | ps++; | |||
1001 | if (pm) | |||
1002 | pm++; | |||
1003 | w--; | |||
1004 | } | |||
1005 | } | |||
1006 | ||||
1007 | static void | |||
1008 | sse2_combine_out_u (pixman_implementation_t *imp, | |||
1009 | pixman_op_t op, | |||
1010 | uint32_t * pd, | |||
1011 | const uint32_t * ps, | |||
1012 | const uint32_t * pm, | |||
1013 | int w) | |||
1014 | { | |||
1015 | while (w && ((uintptr_t)pd & 15)) | |||
1016 | { | |||
1017 | uint32_t s = combine1 (ps, pm); | |||
1018 | uint32_t d = *pd; | |||
1019 | ||||
1020 | *pd++ = pack_1x128_32 ( | |||
1021 | pix_multiply_1x128 ( | |||
1022 | unpack_32_1x128 (s), negate_1x128 ( | |||
1023 | expand_alpha_1x128 (unpack_32_1x128 (d))))); | |||
1024 | w--; | |||
1025 | ps++; | |||
1026 | if (pm) | |||
1027 | pm++; | |||
1028 | } | |||
1029 | ||||
1030 | while (w >= 4) | |||
1031 | { | |||
1032 | __m128i xmm_src_lo, xmm_src_hi; | |||
1033 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
1034 | ||||
1035 | xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); | |||
1036 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | |||
1037 | ||||
1038 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
1039 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
1040 | ||||
1041 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
1042 | negate_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
1043 | ||||
1044 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
1045 | &xmm_dst_lo, &xmm_dst_hi, | |||
1046 | &xmm_dst_lo, &xmm_dst_hi); | |||
1047 | ||||
1048 | save_128_aligned ( | |||
1049 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
1050 | ||||
1051 | ps += 4; | |||
1052 | pd += 4; | |||
1053 | w -= 4; | |||
1054 | if (pm) | |||
1055 | pm += 4; | |||
1056 | } | |||
1057 | ||||
1058 | while (w) | |||
1059 | { | |||
1060 | uint32_t s = combine1 (ps, pm); | |||
1061 | uint32_t d = *pd; | |||
1062 | ||||
1063 | *pd++ = pack_1x128_32 ( | |||
1064 | pix_multiply_1x128 ( | |||
1065 | unpack_32_1x128 (s), negate_1x128 ( | |||
1066 | expand_alpha_1x128 (unpack_32_1x128 (d))))); | |||
1067 | w--; | |||
1068 | ps++; | |||
1069 | if (pm) | |||
1070 | pm++; | |||
1071 | } | |||
1072 | } | |||
1073 | ||||
1074 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | |||
1075 | core_combine_atop_u_pixel_sse2 (uint32_t src, | |||
1076 | uint32_t dst) | |||
1077 | { | |||
1078 | __m128i s = unpack_32_1x128 (src); | |||
1079 | __m128i d = unpack_32_1x128 (dst); | |||
1080 | ||||
1081 | __m128i sa = negate_1x128 (expand_alpha_1x128 (s)); | |||
1082 | __m128i da = expand_alpha_1x128 (d); | |||
1083 | ||||
1084 | return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa)); | |||
1085 | } | |||
1086 | ||||
1087 | static void | |||
1088 | sse2_combine_atop_u (pixman_implementation_t *imp, | |||
1089 | pixman_op_t op, | |||
1090 | uint32_t * pd, | |||
1091 | const uint32_t * ps, | |||
1092 | const uint32_t * pm, | |||
1093 | int w) | |||
1094 | { | |||
1095 | uint32_t s, d; | |||
1096 | ||||
1097 | __m128i xmm_src_lo, xmm_src_hi; | |||
1098 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
1099 | __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; | |||
1100 | __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; | |||
1101 | ||||
1102 | while (w && ((uintptr_t)pd & 15)) | |||
1103 | { | |||
1104 | s = combine1 (ps, pm); | |||
1105 | d = *pd; | |||
1106 | ||||
1107 | *pd++ = core_combine_atop_u_pixel_sse2 (s, d); | |||
1108 | w--; | |||
1109 | ps++; | |||
1110 | if (pm) | |||
1111 | pm++; | |||
1112 | } | |||
1113 | ||||
1114 | while (w >= 4) | |||
1115 | { | |||
1116 | xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); | |||
1117 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | |||
1118 | ||||
1119 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
1120 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
1121 | ||||
1122 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
1123 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | |||
1124 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | |||
1125 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | |||
1126 | ||||
1127 | negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, | |||
1128 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | |||
1129 | ||||
1130 | pix_add_multiply_2x128 ( | |||
1131 | &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, | |||
1132 | &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, | |||
1133 | &xmm_dst_lo, &xmm_dst_hi); | |||
1134 | ||||
1135 | save_128_aligned ( | |||
1136 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
1137 | ||||
1138 | ps += 4; | |||
1139 | pd += 4; | |||
1140 | w -= 4; | |||
1141 | if (pm) | |||
1142 | pm += 4; | |||
1143 | } | |||
1144 | ||||
1145 | while (w) | |||
1146 | { | |||
1147 | s = combine1 (ps, pm); | |||
1148 | d = *pd; | |||
1149 | ||||
1150 | *pd++ = core_combine_atop_u_pixel_sse2 (s, d); | |||
1151 | w--; | |||
1152 | ps++; | |||
1153 | if (pm) | |||
1154 | pm++; | |||
1155 | } | |||
1156 | } | |||
1157 | ||||
1158 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | |||
1159 | core_combine_reverse_atop_u_pixel_sse2 (uint32_t src, | |||
1160 | uint32_t dst) | |||
1161 | { | |||
1162 | __m128i s = unpack_32_1x128 (src); | |||
1163 | __m128i d = unpack_32_1x128 (dst); | |||
1164 | ||||
1165 | __m128i sa = expand_alpha_1x128 (s); | |||
1166 | __m128i da = negate_1x128 (expand_alpha_1x128 (d)); | |||
1167 | ||||
1168 | return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa)); | |||
1169 | } | |||
1170 | ||||
1171 | static void | |||
1172 | sse2_combine_atop_reverse_u (pixman_implementation_t *imp, | |||
1173 | pixman_op_t op, | |||
1174 | uint32_t * pd, | |||
1175 | const uint32_t * ps, | |||
1176 | const uint32_t * pm, | |||
1177 | int w) | |||
1178 | { | |||
1179 | uint32_t s, d; | |||
1180 | ||||
1181 | __m128i xmm_src_lo, xmm_src_hi; | |||
1182 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
1183 | __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; | |||
1184 | __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; | |||
1185 | ||||
1186 | while (w && ((uintptr_t)pd & 15)) | |||
1187 | { | |||
1188 | s = combine1 (ps, pm); | |||
1189 | d = *pd; | |||
1190 | ||||
1191 | *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d); | |||
1192 | ps++; | |||
1193 | w--; | |||
1194 | if (pm) | |||
1195 | pm++; | |||
1196 | } | |||
1197 | ||||
1198 | while (w >= 4) | |||
1199 | { | |||
1200 | xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); | |||
1201 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | |||
1202 | ||||
1203 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
1204 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
1205 | ||||
1206 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
1207 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | |||
1208 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | |||
1209 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | |||
1210 | ||||
1211 | negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, | |||
1212 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | |||
1213 | ||||
1214 | pix_add_multiply_2x128 ( | |||
1215 | &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, | |||
1216 | &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, | |||
1217 | &xmm_dst_lo, &xmm_dst_hi); | |||
1218 | ||||
1219 | save_128_aligned ( | |||
1220 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
1221 | ||||
1222 | ps += 4; | |||
1223 | pd += 4; | |||
1224 | w -= 4; | |||
1225 | if (pm) | |||
1226 | pm += 4; | |||
1227 | } | |||
1228 | ||||
1229 | while (w) | |||
1230 | { | |||
1231 | s = combine1 (ps, pm); | |||
1232 | d = *pd; | |||
1233 | ||||
1234 | *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d); | |||
1235 | ps++; | |||
1236 | w--; | |||
1237 | if (pm) | |||
1238 | pm++; | |||
1239 | } | |||
1240 | } | |||
1241 | ||||
1242 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | |||
1243 | core_combine_xor_u_pixel_sse2 (uint32_t src, | |||
1244 | uint32_t dst) | |||
1245 | { | |||
1246 | __m128i s = unpack_32_1x128 (src); | |||
1247 | __m128i d = unpack_32_1x128 (dst); | |||
1248 | ||||
1249 | __m128i neg_d = negate_1x128 (expand_alpha_1x128 (d)); | |||
1250 | __m128i neg_s = negate_1x128 (expand_alpha_1x128 (s)); | |||
1251 | ||||
1252 | return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s)); | |||
1253 | } | |||
1254 | ||||
1255 | static void | |||
1256 | sse2_combine_xor_u (pixman_implementation_t *imp, | |||
1257 | pixman_op_t op, | |||
1258 | uint32_t * dst, | |||
1259 | const uint32_t * src, | |||
1260 | const uint32_t * mask, | |||
1261 | int width) | |||
1262 | { | |||
1263 | int w = width; | |||
1264 | uint32_t s, d; | |||
1265 | uint32_t* pd = dst; | |||
1266 | const uint32_t* ps = src; | |||
1267 | const uint32_t* pm = mask; | |||
1268 | ||||
1269 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | |||
1270 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
1271 | __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; | |||
1272 | __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; | |||
1273 | ||||
1274 | while (w && ((uintptr_t)pd & 15)) | |||
1275 | { | |||
1276 | s = combine1 (ps, pm); | |||
1277 | d = *pd; | |||
1278 | ||||
1279 | *pd++ = core_combine_xor_u_pixel_sse2 (s, d); | |||
1280 | w--; | |||
1281 | ps++; | |||
1282 | if (pm) | |||
1283 | pm++; | |||
1284 | } | |||
1285 | ||||
1286 | while (w >= 4) | |||
1287 | { | |||
1288 | xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm); | |||
1289 | xmm_dst = load_128_aligned ((__m128i*) pd); | |||
1290 | ||||
1291 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
1292 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
1293 | ||||
1294 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
1295 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | |||
1296 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | |||
1297 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | |||
1298 | ||||
1299 | negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, | |||
1300 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | |||
1301 | negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, | |||
1302 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | |||
1303 | ||||
1304 | pix_add_multiply_2x128 ( | |||
1305 | &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, | |||
1306 | &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, | |||
1307 | &xmm_dst_lo, &xmm_dst_hi); | |||
1308 | ||||
1309 | save_128_aligned ( | |||
1310 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
1311 | ||||
1312 | ps += 4; | |||
1313 | pd += 4; | |||
1314 | w -= 4; | |||
1315 | if (pm) | |||
1316 | pm += 4; | |||
1317 | } | |||
1318 | ||||
1319 | while (w) | |||
1320 | { | |||
1321 | s = combine1 (ps, pm); | |||
1322 | d = *pd; | |||
1323 | ||||
1324 | *pd++ = core_combine_xor_u_pixel_sse2 (s, d); | |||
1325 | w--; | |||
1326 | ps++; | |||
1327 | if (pm) | |||
1328 | pm++; | |||
1329 | } | |||
1330 | } | |||
1331 | ||||
1332 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
1333 | sse2_combine_add_u (pixman_implementation_t *imp, | |||
1334 | pixman_op_t op, | |||
1335 | uint32_t * dst, | |||
1336 | const uint32_t * src, | |||
1337 | const uint32_t * mask, | |||
1338 | int width) | |||
1339 | { | |||
1340 | int w = width; | |||
1341 | uint32_t s, d; | |||
1342 | uint32_t* pd = dst; | |||
1343 | const uint32_t* ps = src; | |||
1344 | const uint32_t* pm = mask; | |||
1345 | ||||
1346 | while (w && (uintptr_t)pd & 15) | |||
1347 | { | |||
1348 | s = combine1 (ps, pm); | |||
1349 | d = *pd; | |||
1350 | ||||
1351 | ps++; | |||
1352 | if (pm) | |||
1353 | pm++; | |||
1354 | *pd++ = _mm_cvtsi128_si32 ( | |||
1355 | _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d))); | |||
1356 | w--; | |||
1357 | } | |||
1358 | ||||
1359 | while (w >= 4) | |||
1360 | { | |||
1361 | __m128i s; | |||
1362 | ||||
1363 | s = combine4 ((__m128i*)ps, (__m128i*)pm); | |||
1364 | ||||
1365 | save_128_aligned ( | |||
1366 | (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned ((__m128i*)pd))); | |||
1367 | ||||
1368 | pd += 4; | |||
1369 | ps += 4; | |||
1370 | if (pm) | |||
1371 | pm += 4; | |||
1372 | w -= 4; | |||
1373 | } | |||
1374 | ||||
1375 | while (w--) | |||
1376 | { | |||
1377 | s = combine1 (ps, pm); | |||
1378 | d = *pd; | |||
1379 | ||||
1380 | ps++; | |||
1381 | *pd++ = _mm_cvtsi128_si32 ( | |||
1382 | _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d))); | |||
1383 | if (pm) | |||
1384 | pm++; | |||
1385 | } | |||
1386 | } | |||
1387 | ||||
1388 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | |||
1389 | core_combine_saturate_u_pixel_sse2 (uint32_t src, | |||
1390 | uint32_t dst) | |||
1391 | { | |||
1392 | __m128i ms = unpack_32_1x128 (src); | |||
1393 | __m128i md = unpack_32_1x128 (dst); | |||
1394 | uint32_t sa = src >> 24; | |||
1395 | uint32_t da = ~dst >> 24; | |||
1396 | ||||
1397 | if (sa > da) | |||
1398 | { | |||
1399 | ms = pix_multiply_1x128 ( | |||
1400 | ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa)(((uint16_t) (da) * 0xff + ((sa) / 2)) / (sa)) << 24))); | |||
1401 | } | |||
1402 | ||||
1403 | return pack_1x128_32 (_mm_adds_epu16 (md, ms)); | |||
1404 | } | |||
1405 | ||||
1406 | static void | |||
1407 | sse2_combine_saturate_u (pixman_implementation_t *imp, | |||
1408 | pixman_op_t op, | |||
1409 | uint32_t * pd, | |||
1410 | const uint32_t * ps, | |||
1411 | const uint32_t * pm, | |||
1412 | int w) | |||
1413 | { | |||
1414 | uint32_t s, d; | |||
1415 | ||||
1416 | uint32_t pack_cmp; | |||
1417 | __m128i xmm_src, xmm_dst; | |||
1418 | ||||
1419 | while (w && (uintptr_t)pd & 15) | |||
1420 | { | |||
1421 | s = combine1 (ps, pm); | |||
1422 | d = *pd; | |||
1423 | ||||
1424 | *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); | |||
1425 | w--; | |||
1426 | ps++; | |||
1427 | if (pm) | |||
1428 | pm++; | |||
1429 | } | |||
1430 | ||||
1431 | while (w >= 4) | |||
1432 | { | |||
1433 | xmm_dst = load_128_aligned ((__m128i*)pd); | |||
1434 | xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm); | |||
1435 | ||||
1436 | pack_cmp = _mm_movemask_epi8 ( | |||
1437 | _mm_cmpgt_epi32 ( | |||
1438 | _mm_srli_epi32 (xmm_src, 24), | |||
1439 | _mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24))); | |||
1440 | ||||
1441 | /* if some alpha src is grater than respective ~alpha dst */ | |||
1442 | if (pack_cmp) | |||
1443 | { | |||
1444 | s = combine1 (ps++, pm); | |||
1445 | d = *pd; | |||
1446 | *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); | |||
1447 | if (pm) | |||
1448 | pm++; | |||
1449 | ||||
1450 | s = combine1 (ps++, pm); | |||
1451 | d = *pd; | |||
1452 | *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); | |||
1453 | if (pm) | |||
1454 | pm++; | |||
1455 | ||||
1456 | s = combine1 (ps++, pm); | |||
1457 | d = *pd; | |||
1458 | *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); | |||
1459 | if (pm) | |||
1460 | pm++; | |||
1461 | ||||
1462 | s = combine1 (ps++, pm); | |||
1463 | d = *pd; | |||
1464 | *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); | |||
1465 | if (pm) | |||
1466 | pm++; | |||
1467 | } | |||
1468 | else | |||
1469 | { | |||
1470 | save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src)); | |||
1471 | ||||
1472 | pd += 4; | |||
1473 | ps += 4; | |||
1474 | if (pm) | |||
1475 | pm += 4; | |||
1476 | } | |||
1477 | ||||
1478 | w -= 4; | |||
1479 | } | |||
1480 | ||||
1481 | while (w--) | |||
1482 | { | |||
1483 | s = combine1 (ps, pm); | |||
1484 | d = *pd; | |||
1485 | ||||
1486 | *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); | |||
1487 | ps++; | |||
1488 | if (pm) | |||
1489 | pm++; | |||
1490 | } | |||
1491 | } | |||
1492 | ||||
1493 | static void | |||
1494 | sse2_combine_src_ca (pixman_implementation_t *imp, | |||
1495 | pixman_op_t op, | |||
1496 | uint32_t * pd, | |||
1497 | const uint32_t * ps, | |||
1498 | const uint32_t * pm, | |||
1499 | int w) | |||
1500 | { | |||
1501 | uint32_t s, m; | |||
1502 | ||||
1503 | __m128i xmm_src_lo, xmm_src_hi; | |||
1504 | __m128i xmm_mask_lo, xmm_mask_hi; | |||
1505 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
1506 | ||||
1507 | while (w && (uintptr_t)pd & 15) | |||
1508 | { | |||
1509 | s = *ps++; | |||
1510 | m = *pm++; | |||
1511 | *pd++ = pack_1x128_32 ( | |||
1512 | pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m))); | |||
1513 | w--; | |||
1514 | } | |||
1515 | ||||
1516 | while (w >= 4) | |||
1517 | { | |||
1518 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | |||
1519 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | |||
1520 | ||||
1521 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
1522 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
1523 | ||||
1524 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
1525 | &xmm_mask_lo, &xmm_mask_hi, | |||
1526 | &xmm_dst_lo, &xmm_dst_hi); | |||
1527 | ||||
1528 | save_128_aligned ( | |||
1529 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
1530 | ||||
1531 | ps += 4; | |||
1532 | pd += 4; | |||
1533 | pm += 4; | |||
1534 | w -= 4; | |||
1535 | } | |||
1536 | ||||
1537 | while (w) | |||
1538 | { | |||
1539 | s = *ps++; | |||
1540 | m = *pm++; | |||
1541 | *pd++ = pack_1x128_32 ( | |||
1542 | pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m))); | |||
1543 | w--; | |||
1544 | } | |||
1545 | } | |||
1546 | ||||
1547 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | |||
1548 | core_combine_over_ca_pixel_sse2 (uint32_t src, | |||
1549 | uint32_t mask, | |||
1550 | uint32_t dst) | |||
1551 | { | |||
1552 | __m128i s = unpack_32_1x128 (src); | |||
1553 | __m128i expAlpha = expand_alpha_1x128 (s); | |||
1554 | __m128i unpk_mask = unpack_32_1x128 (mask); | |||
1555 | __m128i unpk_dst = unpack_32_1x128 (dst); | |||
1556 | ||||
1557 | return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst)); | |||
1558 | } | |||
1559 | ||||
1560 | static void | |||
1561 | sse2_combine_over_ca (pixman_implementation_t *imp, | |||
1562 | pixman_op_t op, | |||
1563 | uint32_t * pd, | |||
1564 | const uint32_t * ps, | |||
1565 | const uint32_t * pm, | |||
1566 | int w) | |||
1567 | { | |||
1568 | uint32_t s, m, d; | |||
1569 | ||||
1570 | __m128i xmm_alpha_lo, xmm_alpha_hi; | |||
1571 | __m128i xmm_src_lo, xmm_src_hi; | |||
1572 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
1573 | __m128i xmm_mask_lo, xmm_mask_hi; | |||
1574 | ||||
1575 | while (w && (uintptr_t)pd & 15) | |||
1576 | { | |||
1577 | s = *ps++; | |||
1578 | m = *pm++; | |||
1579 | d = *pd; | |||
1580 | ||||
1581 | *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d); | |||
1582 | w--; | |||
1583 | } | |||
1584 | ||||
1585 | while (w >= 4) | |||
1586 | { | |||
1587 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | |||
1588 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | |||
1589 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | |||
1590 | ||||
1591 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
1592 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
1593 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
1594 | ||||
1595 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
1596 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
1597 | ||||
1598 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
1599 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
1600 | &xmm_mask_lo, &xmm_mask_hi, | |||
1601 | &xmm_dst_lo, &xmm_dst_hi); | |||
1602 | ||||
1603 | save_128_aligned ( | |||
1604 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
1605 | ||||
1606 | ps += 4; | |||
1607 | pd += 4; | |||
1608 | pm += 4; | |||
1609 | w -= 4; | |||
1610 | } | |||
1611 | ||||
1612 | while (w) | |||
1613 | { | |||
1614 | s = *ps++; | |||
1615 | m = *pm++; | |||
1616 | d = *pd; | |||
1617 | ||||
1618 | *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d); | |||
1619 | w--; | |||
1620 | } | |||
1621 | } | |||
1622 | ||||
1623 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | |||
1624 | core_combine_over_reverse_ca_pixel_sse2 (uint32_t src, | |||
1625 | uint32_t mask, | |||
1626 | uint32_t dst) | |||
1627 | { | |||
1628 | __m128i d = unpack_32_1x128 (dst); | |||
1629 | ||||
1630 | return pack_1x128_32 ( | |||
1631 | over_1x128 (d, expand_alpha_1x128 (d), | |||
1632 | pix_multiply_1x128 (unpack_32_1x128 (src), | |||
1633 | unpack_32_1x128 (mask)))); | |||
1634 | } | |||
1635 | ||||
1636 | static void | |||
1637 | sse2_combine_over_reverse_ca (pixman_implementation_t *imp, | |||
1638 | pixman_op_t op, | |||
1639 | uint32_t * pd, | |||
1640 | const uint32_t * ps, | |||
1641 | const uint32_t * pm, | |||
1642 | int w) | |||
1643 | { | |||
1644 | uint32_t s, m, d; | |||
1645 | ||||
1646 | __m128i xmm_alpha_lo, xmm_alpha_hi; | |||
1647 | __m128i xmm_src_lo, xmm_src_hi; | |||
1648 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
1649 | __m128i xmm_mask_lo, xmm_mask_hi; | |||
1650 | ||||
1651 | while (w && (uintptr_t)pd & 15) | |||
1652 | { | |||
1653 | s = *ps++; | |||
1654 | m = *pm++; | |||
1655 | d = *pd; | |||
1656 | ||||
1657 | *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d); | |||
1658 | w--; | |||
1659 | } | |||
1660 | ||||
1661 | while (w >= 4) | |||
1662 | { | |||
1663 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | |||
1664 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | |||
1665 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | |||
1666 | ||||
1667 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
1668 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
1669 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
1670 | ||||
1671 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | |||
1672 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
1673 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
1674 | &xmm_mask_lo, &xmm_mask_hi, | |||
1675 | &xmm_mask_lo, &xmm_mask_hi); | |||
1676 | ||||
1677 | over_2x128 (&xmm_dst_lo, &xmm_dst_hi, | |||
1678 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
1679 | &xmm_mask_lo, &xmm_mask_hi); | |||
1680 | ||||
1681 | save_128_aligned ( | |||
1682 | (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi)); | |||
1683 | ||||
1684 | ps += 4; | |||
1685 | pd += 4; | |||
1686 | pm += 4; | |||
1687 | w -= 4; | |||
1688 | } | |||
1689 | ||||
1690 | while (w) | |||
1691 | { | |||
1692 | s = *ps++; | |||
1693 | m = *pm++; | |||
1694 | d = *pd; | |||
1695 | ||||
1696 | *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d); | |||
1697 | w--; | |||
1698 | } | |||
1699 | } | |||
1700 | ||||
1701 | static void | |||
1702 | sse2_combine_in_ca (pixman_implementation_t *imp, | |||
1703 | pixman_op_t op, | |||
1704 | uint32_t * pd, | |||
1705 | const uint32_t * ps, | |||
1706 | const uint32_t * pm, | |||
1707 | int w) | |||
1708 | { | |||
1709 | uint32_t s, m, d; | |||
1710 | ||||
1711 | __m128i xmm_alpha_lo, xmm_alpha_hi; | |||
1712 | __m128i xmm_src_lo, xmm_src_hi; | |||
1713 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
1714 | __m128i xmm_mask_lo, xmm_mask_hi; | |||
1715 | ||||
1716 | while (w && (uintptr_t)pd & 15) | |||
1717 | { | |||
1718 | s = *ps++; | |||
1719 | m = *pm++; | |||
1720 | d = *pd; | |||
1721 | ||||
1722 | *pd++ = pack_1x128_32 ( | |||
1723 | pix_multiply_1x128 ( | |||
1724 | pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)), | |||
1725 | expand_alpha_1x128 (unpack_32_1x128 (d)))); | |||
1726 | ||||
1727 | w--; | |||
1728 | } | |||
1729 | ||||
1730 | while (w >= 4) | |||
1731 | { | |||
1732 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | |||
1733 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | |||
1734 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | |||
1735 | ||||
1736 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
1737 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
1738 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
1739 | ||||
1740 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | |||
1741 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
1742 | ||||
1743 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
1744 | &xmm_mask_lo, &xmm_mask_hi, | |||
1745 | &xmm_dst_lo, &xmm_dst_hi); | |||
1746 | ||||
1747 | pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, | |||
1748 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
1749 | &xmm_dst_lo, &xmm_dst_hi); | |||
1750 | ||||
1751 | save_128_aligned ( | |||
1752 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
1753 | ||||
1754 | ps += 4; | |||
1755 | pd += 4; | |||
1756 | pm += 4; | |||
1757 | w -= 4; | |||
1758 | } | |||
1759 | ||||
1760 | while (w) | |||
1761 | { | |||
1762 | s = *ps++; | |||
1763 | m = *pm++; | |||
1764 | d = *pd; | |||
1765 | ||||
1766 | *pd++ = pack_1x128_32 ( | |||
1767 | pix_multiply_1x128 ( | |||
1768 | pix_multiply_1x128 ( | |||
1769 | unpack_32_1x128 (s), unpack_32_1x128 (m)), | |||
1770 | expand_alpha_1x128 (unpack_32_1x128 (d)))); | |||
1771 | ||||
1772 | w--; | |||
1773 | } | |||
1774 | } | |||
1775 | ||||
1776 | static void | |||
1777 | sse2_combine_in_reverse_ca (pixman_implementation_t *imp, | |||
1778 | pixman_op_t op, | |||
1779 | uint32_t * pd, | |||
1780 | const uint32_t * ps, | |||
1781 | const uint32_t * pm, | |||
1782 | int w) | |||
1783 | { | |||
1784 | uint32_t s, m, d; | |||
1785 | ||||
1786 | __m128i xmm_alpha_lo, xmm_alpha_hi; | |||
1787 | __m128i xmm_src_lo, xmm_src_hi; | |||
1788 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
1789 | __m128i xmm_mask_lo, xmm_mask_hi; | |||
1790 | ||||
1791 | while (w && (uintptr_t)pd & 15) | |||
1792 | { | |||
1793 | s = *ps++; | |||
1794 | m = *pm++; | |||
1795 | d = *pd; | |||
1796 | ||||
1797 | *pd++ = pack_1x128_32 ( | |||
1798 | pix_multiply_1x128 ( | |||
1799 | unpack_32_1x128 (d), | |||
1800 | pix_multiply_1x128 (unpack_32_1x128 (m), | |||
1801 | expand_alpha_1x128 (unpack_32_1x128 (s))))); | |||
1802 | w--; | |||
1803 | } | |||
1804 | ||||
1805 | while (w >= 4) | |||
1806 | { | |||
1807 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | |||
1808 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | |||
1809 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | |||
1810 | ||||
1811 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
1812 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
1813 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
1814 | ||||
1815 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
1816 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
1817 | pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, | |||
1818 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
1819 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
1820 | ||||
1821 | pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, | |||
1822 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
1823 | &xmm_dst_lo, &xmm_dst_hi); | |||
1824 | ||||
1825 | save_128_aligned ( | |||
1826 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
1827 | ||||
1828 | ps += 4; | |||
1829 | pd += 4; | |||
1830 | pm += 4; | |||
1831 | w -= 4; | |||
1832 | } | |||
1833 | ||||
1834 | while (w) | |||
1835 | { | |||
1836 | s = *ps++; | |||
1837 | m = *pm++; | |||
1838 | d = *pd; | |||
1839 | ||||
1840 | *pd++ = pack_1x128_32 ( | |||
1841 | pix_multiply_1x128 ( | |||
1842 | unpack_32_1x128 (d), | |||
1843 | pix_multiply_1x128 (unpack_32_1x128 (m), | |||
1844 | expand_alpha_1x128 (unpack_32_1x128 (s))))); | |||
1845 | w--; | |||
1846 | } | |||
1847 | } | |||
1848 | ||||
1849 | static void | |||
1850 | sse2_combine_out_ca (pixman_implementation_t *imp, | |||
1851 | pixman_op_t op, | |||
1852 | uint32_t * pd, | |||
1853 | const uint32_t * ps, | |||
1854 | const uint32_t * pm, | |||
1855 | int w) | |||
1856 | { | |||
1857 | uint32_t s, m, d; | |||
1858 | ||||
1859 | __m128i xmm_alpha_lo, xmm_alpha_hi; | |||
1860 | __m128i xmm_src_lo, xmm_src_hi; | |||
1861 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
1862 | __m128i xmm_mask_lo, xmm_mask_hi; | |||
1863 | ||||
1864 | while (w && (uintptr_t)pd & 15) | |||
1865 | { | |||
1866 | s = *ps++; | |||
1867 | m = *pm++; | |||
1868 | d = *pd; | |||
1869 | ||||
1870 | *pd++ = pack_1x128_32 ( | |||
1871 | pix_multiply_1x128 ( | |||
1872 | pix_multiply_1x128 ( | |||
1873 | unpack_32_1x128 (s), unpack_32_1x128 (m)), | |||
1874 | negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d))))); | |||
1875 | w--; | |||
1876 | } | |||
1877 | ||||
1878 | while (w >= 4) | |||
1879 | { | |||
1880 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | |||
1881 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | |||
1882 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | |||
1883 | ||||
1884 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
1885 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
1886 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
1887 | ||||
1888 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | |||
1889 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
1890 | negate_2x128 (xmm_alpha_lo, xmm_alpha_hi, | |||
1891 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
1892 | ||||
1893 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
1894 | &xmm_mask_lo, &xmm_mask_hi, | |||
1895 | &xmm_dst_lo, &xmm_dst_hi); | |||
1896 | pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, | |||
1897 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
1898 | &xmm_dst_lo, &xmm_dst_hi); | |||
1899 | ||||
1900 | save_128_aligned ( | |||
1901 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
1902 | ||||
1903 | ps += 4; | |||
1904 | pd += 4; | |||
1905 | pm += 4; | |||
1906 | w -= 4; | |||
1907 | } | |||
1908 | ||||
1909 | while (w) | |||
1910 | { | |||
1911 | s = *ps++; | |||
1912 | m = *pm++; | |||
1913 | d = *pd; | |||
1914 | ||||
1915 | *pd++ = pack_1x128_32 ( | |||
1916 | pix_multiply_1x128 ( | |||
1917 | pix_multiply_1x128 ( | |||
1918 | unpack_32_1x128 (s), unpack_32_1x128 (m)), | |||
1919 | negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d))))); | |||
1920 | ||||
1921 | w--; | |||
1922 | } | |||
1923 | } | |||
1924 | ||||
1925 | static void | |||
1926 | sse2_combine_out_reverse_ca (pixman_implementation_t *imp, | |||
1927 | pixman_op_t op, | |||
1928 | uint32_t * pd, | |||
1929 | const uint32_t * ps, | |||
1930 | const uint32_t * pm, | |||
1931 | int w) | |||
1932 | { | |||
1933 | uint32_t s, m, d; | |||
1934 | ||||
1935 | __m128i xmm_alpha_lo, xmm_alpha_hi; | |||
1936 | __m128i xmm_src_lo, xmm_src_hi; | |||
1937 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
1938 | __m128i xmm_mask_lo, xmm_mask_hi; | |||
1939 | ||||
1940 | while (w && (uintptr_t)pd & 15) | |||
1941 | { | |||
1942 | s = *ps++; | |||
1943 | m = *pm++; | |||
1944 | d = *pd; | |||
1945 | ||||
1946 | *pd++ = pack_1x128_32 ( | |||
1947 | pix_multiply_1x128 ( | |||
1948 | unpack_32_1x128 (d), | |||
1949 | negate_1x128 (pix_multiply_1x128 ( | |||
1950 | unpack_32_1x128 (m), | |||
1951 | expand_alpha_1x128 (unpack_32_1x128 (s)))))); | |||
1952 | w--; | |||
1953 | } | |||
1954 | ||||
1955 | while (w >= 4) | |||
1956 | { | |||
1957 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | |||
1958 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | |||
1959 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | |||
1960 | ||||
1961 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
1962 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
1963 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
1964 | ||||
1965 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
1966 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
1967 | ||||
1968 | pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, | |||
1969 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
1970 | &xmm_mask_lo, &xmm_mask_hi); | |||
1971 | ||||
1972 | negate_2x128 (xmm_mask_lo, xmm_mask_hi, | |||
1973 | &xmm_mask_lo, &xmm_mask_hi); | |||
1974 | ||||
1975 | pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, | |||
1976 | &xmm_mask_lo, &xmm_mask_hi, | |||
1977 | &xmm_dst_lo, &xmm_dst_hi); | |||
1978 | ||||
1979 | save_128_aligned ( | |||
1980 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
1981 | ||||
1982 | ps += 4; | |||
1983 | pd += 4; | |||
1984 | pm += 4; | |||
1985 | w -= 4; | |||
1986 | } | |||
1987 | ||||
1988 | while (w) | |||
1989 | { | |||
1990 | s = *ps++; | |||
1991 | m = *pm++; | |||
1992 | d = *pd; | |||
1993 | ||||
1994 | *pd++ = pack_1x128_32 ( | |||
1995 | pix_multiply_1x128 ( | |||
1996 | unpack_32_1x128 (d), | |||
1997 | negate_1x128 (pix_multiply_1x128 ( | |||
1998 | unpack_32_1x128 (m), | |||
1999 | expand_alpha_1x128 (unpack_32_1x128 (s)))))); | |||
2000 | w--; | |||
2001 | } | |||
2002 | } | |||
2003 | ||||
2004 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | |||
2005 | core_combine_atop_ca_pixel_sse2 (uint32_t src, | |||
2006 | uint32_t mask, | |||
2007 | uint32_t dst) | |||
2008 | { | |||
2009 | __m128i m = unpack_32_1x128 (mask); | |||
2010 | __m128i s = unpack_32_1x128 (src); | |||
2011 | __m128i d = unpack_32_1x128 (dst); | |||
2012 | __m128i sa = expand_alpha_1x128 (s); | |||
2013 | __m128i da = expand_alpha_1x128 (d); | |||
2014 | ||||
2015 | s = pix_multiply_1x128 (s, m); | |||
2016 | m = negate_1x128 (pix_multiply_1x128 (m, sa)); | |||
2017 | ||||
2018 | return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da)); | |||
2019 | } | |||
2020 | ||||
2021 | static void | |||
2022 | sse2_combine_atop_ca (pixman_implementation_t *imp, | |||
2023 | pixman_op_t op, | |||
2024 | uint32_t * pd, | |||
2025 | const uint32_t * ps, | |||
2026 | const uint32_t * pm, | |||
2027 | int w) | |||
2028 | { | |||
2029 | uint32_t s, m, d; | |||
2030 | ||||
2031 | __m128i xmm_src_lo, xmm_src_hi; | |||
2032 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
2033 | __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; | |||
2034 | __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; | |||
2035 | __m128i xmm_mask_lo, xmm_mask_hi; | |||
2036 | ||||
2037 | while (w && (uintptr_t)pd & 15) | |||
2038 | { | |||
2039 | s = *ps++; | |||
2040 | m = *pm++; | |||
2041 | d = *pd; | |||
2042 | ||||
2043 | *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d); | |||
2044 | w--; | |||
2045 | } | |||
2046 | ||||
2047 | while (w >= 4) | |||
2048 | { | |||
2049 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | |||
2050 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | |||
2051 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | |||
2052 | ||||
2053 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
2054 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
2055 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
2056 | ||||
2057 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
2058 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | |||
2059 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | |||
2060 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | |||
2061 | ||||
2062 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
2063 | &xmm_mask_lo, &xmm_mask_hi, | |||
2064 | &xmm_src_lo, &xmm_src_hi); | |||
2065 | pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, | |||
2066 | &xmm_alpha_src_lo, &xmm_alpha_src_hi, | |||
2067 | &xmm_mask_lo, &xmm_mask_hi); | |||
2068 | ||||
2069 | negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
2070 | ||||
2071 | pix_add_multiply_2x128 ( | |||
2072 | &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, | |||
2073 | &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, | |||
2074 | &xmm_dst_lo, &xmm_dst_hi); | |||
2075 | ||||
2076 | save_128_aligned ( | |||
2077 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
2078 | ||||
2079 | ps += 4; | |||
2080 | pd += 4; | |||
2081 | pm += 4; | |||
2082 | w -= 4; | |||
2083 | } | |||
2084 | ||||
2085 | while (w) | |||
2086 | { | |||
2087 | s = *ps++; | |||
2088 | m = *pm++; | |||
2089 | d = *pd; | |||
2090 | ||||
2091 | *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d); | |||
2092 | w--; | |||
2093 | } | |||
2094 | } | |||
2095 | ||||
2096 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | |||
2097 | core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src, | |||
2098 | uint32_t mask, | |||
2099 | uint32_t dst) | |||
2100 | { | |||
2101 | __m128i m = unpack_32_1x128 (mask); | |||
2102 | __m128i s = unpack_32_1x128 (src); | |||
2103 | __m128i d = unpack_32_1x128 (dst); | |||
2104 | ||||
2105 | __m128i da = negate_1x128 (expand_alpha_1x128 (d)); | |||
2106 | __m128i sa = expand_alpha_1x128 (s); | |||
2107 | ||||
2108 | s = pix_multiply_1x128 (s, m); | |||
2109 | m = pix_multiply_1x128 (m, sa); | |||
2110 | ||||
2111 | return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da)); | |||
2112 | } | |||
2113 | ||||
2114 | static void | |||
2115 | sse2_combine_atop_reverse_ca (pixman_implementation_t *imp, | |||
2116 | pixman_op_t op, | |||
2117 | uint32_t * pd, | |||
2118 | const uint32_t * ps, | |||
2119 | const uint32_t * pm, | |||
2120 | int w) | |||
2121 | { | |||
2122 | uint32_t s, m, d; | |||
2123 | ||||
2124 | __m128i xmm_src_lo, xmm_src_hi; | |||
2125 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
2126 | __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; | |||
2127 | __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; | |||
2128 | __m128i xmm_mask_lo, xmm_mask_hi; | |||
2129 | ||||
2130 | while (w && (uintptr_t)pd & 15) | |||
2131 | { | |||
2132 | s = *ps++; | |||
2133 | m = *pm++; | |||
2134 | d = *pd; | |||
2135 | ||||
2136 | *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d); | |||
2137 | w--; | |||
2138 | } | |||
2139 | ||||
2140 | while (w >= 4) | |||
2141 | { | |||
2142 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | |||
2143 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | |||
2144 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | |||
2145 | ||||
2146 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
2147 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
2148 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
2149 | ||||
2150 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
2151 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | |||
2152 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | |||
2153 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | |||
2154 | ||||
2155 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
2156 | &xmm_mask_lo, &xmm_mask_hi, | |||
2157 | &xmm_src_lo, &xmm_src_hi); | |||
2158 | pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, | |||
2159 | &xmm_alpha_src_lo, &xmm_alpha_src_hi, | |||
2160 | &xmm_mask_lo, &xmm_mask_hi); | |||
2161 | ||||
2162 | negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, | |||
2163 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | |||
2164 | ||||
2165 | pix_add_multiply_2x128 ( | |||
2166 | &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, | |||
2167 | &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, | |||
2168 | &xmm_dst_lo, &xmm_dst_hi); | |||
2169 | ||||
2170 | save_128_aligned ( | |||
2171 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
2172 | ||||
2173 | ps += 4; | |||
2174 | pd += 4; | |||
2175 | pm += 4; | |||
2176 | w -= 4; | |||
2177 | } | |||
2178 | ||||
2179 | while (w) | |||
2180 | { | |||
2181 | s = *ps++; | |||
2182 | m = *pm++; | |||
2183 | d = *pd; | |||
2184 | ||||
2185 | *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d); | |||
2186 | w--; | |||
2187 | } | |||
2188 | } | |||
2189 | ||||
2190 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | |||
2191 | core_combine_xor_ca_pixel_sse2 (uint32_t src, | |||
2192 | uint32_t mask, | |||
2193 | uint32_t dst) | |||
2194 | { | |||
2195 | __m128i a = unpack_32_1x128 (mask); | |||
2196 | __m128i s = unpack_32_1x128 (src); | |||
2197 | __m128i d = unpack_32_1x128 (dst); | |||
2198 | ||||
2199 | __m128i alpha_dst = negate_1x128 (pix_multiply_1x128 ( | |||
2200 | a, expand_alpha_1x128 (s))); | |||
2201 | __m128i dest = pix_multiply_1x128 (s, a); | |||
2202 | __m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d)); | |||
2203 | ||||
2204 | return pack_1x128_32 (pix_add_multiply_1x128 (&d, | |||
2205 | &alpha_dst, | |||
2206 | &dest, | |||
2207 | &alpha_src)); | |||
2208 | } | |||
2209 | ||||
2210 | static void | |||
2211 | sse2_combine_xor_ca (pixman_implementation_t *imp, | |||
2212 | pixman_op_t op, | |||
2213 | uint32_t * pd, | |||
2214 | const uint32_t * ps, | |||
2215 | const uint32_t * pm, | |||
2216 | int w) | |||
2217 | { | |||
2218 | uint32_t s, m, d; | |||
2219 | ||||
2220 | __m128i xmm_src_lo, xmm_src_hi; | |||
2221 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
2222 | __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; | |||
2223 | __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; | |||
2224 | __m128i xmm_mask_lo, xmm_mask_hi; | |||
2225 | ||||
2226 | while (w && (uintptr_t)pd & 15) | |||
2227 | { | |||
2228 | s = *ps++; | |||
2229 | m = *pm++; | |||
2230 | d = *pd; | |||
2231 | ||||
2232 | *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d); | |||
2233 | w--; | |||
2234 | } | |||
2235 | ||||
2236 | while (w >= 4) | |||
2237 | { | |||
2238 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | |||
2239 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | |||
2240 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | |||
2241 | ||||
2242 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
2243 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
2244 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
2245 | ||||
2246 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
2247 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | |||
2248 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | |||
2249 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | |||
2250 | ||||
2251 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
2252 | &xmm_mask_lo, &xmm_mask_hi, | |||
2253 | &xmm_src_lo, &xmm_src_hi); | |||
2254 | pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, | |||
2255 | &xmm_alpha_src_lo, &xmm_alpha_src_hi, | |||
2256 | &xmm_mask_lo, &xmm_mask_hi); | |||
2257 | ||||
2258 | negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, | |||
2259 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | |||
2260 | negate_2x128 (xmm_mask_lo, xmm_mask_hi, | |||
2261 | &xmm_mask_lo, &xmm_mask_hi); | |||
2262 | ||||
2263 | pix_add_multiply_2x128 ( | |||
2264 | &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, | |||
2265 | &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, | |||
2266 | &xmm_dst_lo, &xmm_dst_hi); | |||
2267 | ||||
2268 | save_128_aligned ( | |||
2269 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
2270 | ||||
2271 | ps += 4; | |||
2272 | pd += 4; | |||
2273 | pm += 4; | |||
2274 | w -= 4; | |||
2275 | } | |||
2276 | ||||
2277 | while (w) | |||
2278 | { | |||
2279 | s = *ps++; | |||
2280 | m = *pm++; | |||
2281 | d = *pd; | |||
2282 | ||||
2283 | *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d); | |||
2284 | w--; | |||
2285 | } | |||
2286 | } | |||
2287 | ||||
2288 | static void | |||
2289 | sse2_combine_add_ca (pixman_implementation_t *imp, | |||
2290 | pixman_op_t op, | |||
2291 | uint32_t * pd, | |||
2292 | const uint32_t * ps, | |||
2293 | const uint32_t * pm, | |||
2294 | int w) | |||
2295 | { | |||
2296 | uint32_t s, m, d; | |||
2297 | ||||
2298 | __m128i xmm_src_lo, xmm_src_hi; | |||
2299 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
2300 | __m128i xmm_mask_lo, xmm_mask_hi; | |||
2301 | ||||
2302 | while (w && (uintptr_t)pd & 15) | |||
2303 | { | |||
2304 | s = *ps++; | |||
2305 | m = *pm++; | |||
2306 | d = *pd; | |||
2307 | ||||
2308 | *pd++ = pack_1x128_32 ( | |||
2309 | _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s), | |||
2310 | unpack_32_1x128 (m)), | |||
2311 | unpack_32_1x128 (d))); | |||
2312 | w--; | |||
2313 | } | |||
2314 | ||||
2315 | while (w >= 4) | |||
2316 | { | |||
2317 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | |||
2318 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | |||
2319 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | |||
2320 | ||||
2321 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
2322 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
2323 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
2324 | ||||
2325 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
2326 | &xmm_mask_lo, &xmm_mask_hi, | |||
2327 | &xmm_src_lo, &xmm_src_hi); | |||
2328 | ||||
2329 | save_128_aligned ( | |||
2330 | (__m128i*)pd, pack_2x128_128 ( | |||
2331 | _mm_adds_epu8 (xmm_src_lo, xmm_dst_lo), | |||
2332 | _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi))); | |||
2333 | ||||
2334 | ps += 4; | |||
2335 | pd += 4; | |||
2336 | pm += 4; | |||
2337 | w -= 4; | |||
2338 | } | |||
2339 | ||||
2340 | while (w) | |||
2341 | { | |||
2342 | s = *ps++; | |||
2343 | m = *pm++; | |||
2344 | d = *pd; | |||
2345 | ||||
2346 | *pd++ = pack_1x128_32 ( | |||
2347 | _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s), | |||
2348 | unpack_32_1x128 (m)), | |||
2349 | unpack_32_1x128 (d))); | |||
2350 | w--; | |||
2351 | } | |||
2352 | } | |||
2353 | ||||
2354 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
2355 | create_mask_16_128 (uint16_t mask) | |||
2356 | { | |||
2357 | return _mm_set1_epi16 (mask); | |||
2358 | } | |||
2359 | ||||
2360 | /* Work around a code generation bug in Sun Studio 12. */ | |||
2361 | #if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590) | |||
2362 | # define create_mask_2x32_128(mask0, mask1) \ | |||
2363 | (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1))) | |||
2364 | #else | |||
2365 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | |||
2366 | create_mask_2x32_128 (uint32_t mask0, | |||
2367 | uint32_t mask1) | |||
2368 | { | |||
2369 | return _mm_set_epi32 (mask0, mask1, mask0, mask1); | |||
2370 | } | |||
2371 | #endif | |||
2372 | ||||
2373 | static void | |||
2374 | sse2_composite_over_n_8888 (pixman_implementation_t *imp, | |||
2375 | pixman_composite_info_t *info) | |||
2376 | { | |||
2377 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
2378 | uint32_t src; | |||
2379 | uint32_t *dst_line, *dst, d; | |||
2380 | int32_t w; | |||
2381 | int dst_stride; | |||
2382 | __m128i xmm_src, xmm_alpha; | |||
2383 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
2384 | ||||
2385 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
2386 | ||||
2387 | if (src == 0) | |||
2388 | return; | |||
2389 | ||||
2390 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
2391 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
2392 | ||||
2393 | xmm_src = expand_pixel_32_1x128 (src); | |||
2394 | xmm_alpha = expand_alpha_1x128 (xmm_src); | |||
2395 | ||||
2396 | while (height--) | |||
2397 | { | |||
2398 | dst = dst_line; | |||
2399 | ||||
2400 | dst_line += dst_stride; | |||
2401 | w = width; | |||
2402 | ||||
2403 | while (w && (uintptr_t)dst & 15) | |||
2404 | { | |||
2405 | d = *dst; | |||
2406 | *dst++ = pack_1x128_32 (over_1x128 (xmm_src, | |||
2407 | xmm_alpha, | |||
2408 | unpack_32_1x128 (d))); | |||
2409 | w--; | |||
2410 | } | |||
2411 | ||||
2412 | while (w >= 4) | |||
2413 | { | |||
2414 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
2415 | ||||
2416 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
2417 | ||||
2418 | over_2x128 (&xmm_src, &xmm_src, | |||
2419 | &xmm_alpha, &xmm_alpha, | |||
2420 | &xmm_dst_lo, &xmm_dst_hi); | |||
2421 | ||||
2422 | /* rebuid the 4 pixel data and save*/ | |||
2423 | save_128_aligned ( | |||
2424 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
2425 | ||||
2426 | w -= 4; | |||
2427 | dst += 4; | |||
2428 | } | |||
2429 | ||||
2430 | while (w) | |||
2431 | { | |||
2432 | d = *dst; | |||
2433 | *dst++ = pack_1x128_32 (over_1x128 (xmm_src, | |||
2434 | xmm_alpha, | |||
2435 | unpack_32_1x128 (d))); | |||
2436 | w--; | |||
2437 | } | |||
2438 | ||||
2439 | } | |||
2440 | } | |||
2441 | ||||
2442 | static void | |||
2443 | sse2_composite_over_n_0565 (pixman_implementation_t *imp, | |||
2444 | pixman_composite_info_t *info) | |||
2445 | { | |||
2446 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
2447 | uint32_t src; | |||
2448 | uint16_t *dst_line, *dst, d; | |||
2449 | int32_t w; | |||
2450 | int dst_stride; | |||
2451 | __m128i xmm_src, xmm_alpha; | |||
2452 | __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; | |||
2453 | ||||
2454 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
2455 | ||||
2456 | if (src == 0) | |||
2457 | return; | |||
2458 | ||||
2459 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
2460 | dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
2461 | ||||
2462 | xmm_src = expand_pixel_32_1x128 (src); | |||
2463 | xmm_alpha = expand_alpha_1x128 (xmm_src); | |||
2464 | ||||
2465 | while (height--) | |||
2466 | { | |||
2467 | dst = dst_line; | |||
2468 | ||||
2469 | dst_line += dst_stride; | |||
2470 | w = width; | |||
2471 | ||||
2472 | while (w && (uintptr_t)dst & 15) | |||
2473 | { | |||
2474 | d = *dst; | |||
2475 | ||||
2476 | *dst++ = pack_565_32_16 ( | |||
2477 | pack_1x128_32 (over_1x128 (xmm_src, | |||
2478 | xmm_alpha, | |||
2479 | expand565_16_1x128 (d)))); | |||
2480 | w--; | |||
2481 | } | |||
2482 | ||||
2483 | while (w >= 8) | |||
2484 | { | |||
2485 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
2486 | ||||
2487 | unpack_565_128_4x128 (xmm_dst, | |||
2488 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); | |||
2489 | ||||
2490 | over_2x128 (&xmm_src, &xmm_src, | |||
2491 | &xmm_alpha, &xmm_alpha, | |||
2492 | &xmm_dst0, &xmm_dst1); | |||
2493 | over_2x128 (&xmm_src, &xmm_src, | |||
2494 | &xmm_alpha, &xmm_alpha, | |||
2495 | &xmm_dst2, &xmm_dst3); | |||
2496 | ||||
2497 | xmm_dst = pack_565_4x128_128 ( | |||
2498 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); | |||
2499 | ||||
2500 | save_128_aligned ((__m128i*)dst, xmm_dst); | |||
2501 | ||||
2502 | dst += 8; | |||
2503 | w -= 8; | |||
2504 | } | |||
2505 | ||||
2506 | while (w--) | |||
2507 | { | |||
2508 | d = *dst; | |||
2509 | *dst++ = pack_565_32_16 ( | |||
2510 | pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha, | |||
2511 | expand565_16_1x128 (d)))); | |||
2512 | } | |||
2513 | } | |||
2514 | ||||
2515 | } | |||
2516 | ||||
2517 | static void | |||
2518 | sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, | |||
2519 | pixman_composite_info_t *info) | |||
2520 | { | |||
2521 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
2522 | uint32_t src; | |||
2523 | uint32_t *dst_line, d; | |||
2524 | uint32_t *mask_line, m; | |||
2525 | uint32_t pack_cmp; | |||
2526 | int dst_stride, mask_stride; | |||
2527 | ||||
2528 | __m128i xmm_src; | |||
2529 | __m128i xmm_dst; | |||
2530 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | |||
2531 | ||||
2532 | __m128i mmx_src, mmx_mask, mmx_dest; | |||
2533 | ||||
2534 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
2535 | ||||
2536 | if (src == 0) | |||
2537 | return; | |||
2538 | ||||
2539 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
2540 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
2541 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | |||
2542 | mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | |||
2543 | ||||
2544 | xmm_src = _mm_unpacklo_epi8 ( | |||
2545 | create_mask_2x32_128 (src, src), _mm_setzero_si128 ()); | |||
2546 | mmx_src = xmm_src; | |||
2547 | ||||
2548 | while (height--) | |||
2549 | { | |||
2550 | int w = width; | |||
2551 | const uint32_t *pm = (uint32_t *)mask_line; | |||
2552 | uint32_t *pd = (uint32_t *)dst_line; | |||
2553 | ||||
2554 | dst_line += dst_stride; | |||
2555 | mask_line += mask_stride; | |||
2556 | ||||
2557 | while (w && (uintptr_t)pd & 15) | |||
2558 | { | |||
2559 | m = *pm++; | |||
2560 | ||||
2561 | if (m) | |||
2562 | { | |||
2563 | d = *pd; | |||
2564 | ||||
2565 | mmx_mask = unpack_32_1x128 (m); | |||
2566 | mmx_dest = unpack_32_1x128 (d); | |||
2567 | ||||
2568 | *pd = pack_1x128_32 ( | |||
2569 | _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), | |||
2570 | mmx_dest)); | |||
2571 | } | |||
2572 | ||||
2573 | pd++; | |||
2574 | w--; | |||
2575 | } | |||
2576 | ||||
2577 | while (w >= 4) | |||
2578 | { | |||
2579 | xmm_mask = load_128_unaligned ((__m128i*)pm); | |||
2580 | ||||
2581 | pack_cmp = | |||
2582 | _mm_movemask_epi8 ( | |||
2583 | _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); | |||
2584 | ||||
2585 | /* if all bits in mask are zero, pack_cmp are equal to 0xffff */ | |||
2586 | if (pack_cmp != 0xffff) | |||
2587 | { | |||
2588 | xmm_dst = load_128_aligned ((__m128i*)pd); | |||
2589 | ||||
2590 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
2591 | ||||
2592 | pix_multiply_2x128 (&xmm_src, &xmm_src, | |||
2593 | &xmm_mask_lo, &xmm_mask_hi, | |||
2594 | &xmm_mask_lo, &xmm_mask_hi); | |||
2595 | xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi); | |||
2596 | ||||
2597 | save_128_aligned ( | |||
2598 | (__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst)); | |||
2599 | } | |||
2600 | ||||
2601 | pd += 4; | |||
2602 | pm += 4; | |||
2603 | w -= 4; | |||
2604 | } | |||
2605 | ||||
2606 | while (w) | |||
2607 | { | |||
2608 | m = *pm++; | |||
2609 | ||||
2610 | if (m) | |||
2611 | { | |||
2612 | d = *pd; | |||
2613 | ||||
2614 | mmx_mask = unpack_32_1x128 (m); | |||
2615 | mmx_dest = unpack_32_1x128 (d); | |||
2616 | ||||
2617 | *pd = pack_1x128_32 ( | |||
2618 | _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), | |||
2619 | mmx_dest)); | |||
2620 | } | |||
2621 | ||||
2622 | pd++; | |||
2623 | w--; | |||
2624 | } | |||
2625 | } | |||
2626 | ||||
2627 | } | |||
2628 | ||||
2629 | static void | |||
2630 | sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, | |||
2631 | pixman_composite_info_t *info) | |||
2632 | { | |||
2633 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
2634 | uint32_t src; | |||
2635 | uint32_t *dst_line, d; | |||
2636 | uint32_t *mask_line, m; | |||
2637 | uint32_t pack_cmp; | |||
2638 | int dst_stride, mask_stride; | |||
2639 | ||||
2640 | __m128i xmm_src, xmm_alpha; | |||
2641 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
2642 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | |||
2643 | ||||
2644 | __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; | |||
2645 | ||||
2646 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
2647 | ||||
2648 | if (src == 0) | |||
2649 | return; | |||
2650 | ||||
2651 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
2652 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
2653 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | |||
2654 | mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | |||
2655 | ||||
2656 | xmm_src = _mm_unpacklo_epi8 ( | |||
2657 | create_mask_2x32_128 (src, src), _mm_setzero_si128 ()); | |||
2658 | xmm_alpha = expand_alpha_1x128 (xmm_src); | |||
2659 | mmx_src = xmm_src; | |||
2660 | mmx_alpha = xmm_alpha; | |||
2661 | ||||
2662 | while (height--) | |||
2663 | { | |||
2664 | int w = width; | |||
2665 | const uint32_t *pm = (uint32_t *)mask_line; | |||
2666 | uint32_t *pd = (uint32_t *)dst_line; | |||
2667 | ||||
2668 | dst_line += dst_stride; | |||
2669 | mask_line += mask_stride; | |||
2670 | ||||
2671 | while (w && (uintptr_t)pd & 15) | |||
2672 | { | |||
2673 | m = *pm++; | |||
2674 | ||||
2675 | if (m) | |||
2676 | { | |||
2677 | d = *pd; | |||
2678 | mmx_mask = unpack_32_1x128 (m); | |||
2679 | mmx_dest = unpack_32_1x128 (d); | |||
2680 | ||||
2681 | *pd = pack_1x128_32 (in_over_1x128 (&mmx_src, | |||
2682 | &mmx_alpha, | |||
2683 | &mmx_mask, | |||
2684 | &mmx_dest)); | |||
2685 | } | |||
2686 | ||||
2687 | pd++; | |||
2688 | w--; | |||
2689 | } | |||
2690 | ||||
2691 | while (w >= 4) | |||
2692 | { | |||
2693 | xmm_mask = load_128_unaligned ((__m128i*)pm); | |||
2694 | ||||
2695 | pack_cmp = | |||
2696 | _mm_movemask_epi8 ( | |||
2697 | _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); | |||
2698 | ||||
2699 | /* if all bits in mask are zero, pack_cmp are equal to 0xffff */ | |||
2700 | if (pack_cmp != 0xffff) | |||
2701 | { | |||
2702 | xmm_dst = load_128_aligned ((__m128i*)pd); | |||
2703 | ||||
2704 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
2705 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
2706 | ||||
2707 | in_over_2x128 (&xmm_src, &xmm_src, | |||
2708 | &xmm_alpha, &xmm_alpha, | |||
2709 | &xmm_mask_lo, &xmm_mask_hi, | |||
2710 | &xmm_dst_lo, &xmm_dst_hi); | |||
2711 | ||||
2712 | save_128_aligned ( | |||
2713 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
2714 | } | |||
2715 | ||||
2716 | pd += 4; | |||
2717 | pm += 4; | |||
2718 | w -= 4; | |||
2719 | } | |||
2720 | ||||
2721 | while (w) | |||
2722 | { | |||
2723 | m = *pm++; | |||
2724 | ||||
2725 | if (m) | |||
2726 | { | |||
2727 | d = *pd; | |||
2728 | mmx_mask = unpack_32_1x128 (m); | |||
2729 | mmx_dest = unpack_32_1x128 (d); | |||
2730 | ||||
2731 | *pd = pack_1x128_32 ( | |||
2732 | in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)); | |||
2733 | } | |||
2734 | ||||
2735 | pd++; | |||
2736 | w--; | |||
2737 | } | |||
2738 | } | |||
2739 | ||||
2740 | } | |||
2741 | ||||
2742 | static void | |||
2743 | sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp, | |||
2744 | pixman_composite_info_t *info) | |||
2745 | { | |||
2746 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
2747 | uint32_t *dst_line, *dst; | |||
2748 | uint32_t *src_line, *src; | |||
2749 | uint32_t mask; | |||
2750 | int32_t w; | |||
2751 | int dst_stride, src_stride; | |||
2752 | ||||
2753 | __m128i xmm_mask; | |||
2754 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | |||
2755 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
2756 | __m128i xmm_alpha_lo, xmm_alpha_hi; | |||
2757 | ||||
2758 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
2759 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
2760 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | |||
2761 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
2762 | ||||
2763 | mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); | |||
2764 | ||||
2765 | xmm_mask = create_mask_16_128 (mask >> 24); | |||
2766 | ||||
2767 | while (height--) | |||
2768 | { | |||
2769 | dst = dst_line; | |||
2770 | dst_line += dst_stride; | |||
2771 | src = src_line; | |||
2772 | src_line += src_stride; | |||
2773 | w = width; | |||
2774 | ||||
2775 | while (w && (uintptr_t)dst & 15) | |||
2776 | { | |||
2777 | uint32_t s = *src++; | |||
2778 | ||||
2779 | if (s) | |||
2780 | { | |||
2781 | uint32_t d = *dst; | |||
2782 | ||||
2783 | __m128i ms = unpack_32_1x128 (s); | |||
2784 | __m128i alpha = expand_alpha_1x128 (ms); | |||
2785 | __m128i dest = xmm_mask; | |||
2786 | __m128i alpha_dst = unpack_32_1x128 (d); | |||
2787 | ||||
2788 | *dst = pack_1x128_32 ( | |||
2789 | in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); | |||
2790 | } | |||
2791 | dst++; | |||
2792 | w--; | |||
2793 | } | |||
2794 | ||||
2795 | while (w >= 4) | |||
2796 | { | |||
2797 | xmm_src = load_128_unaligned ((__m128i*)src); | |||
2798 | ||||
2799 | if (!is_zero (xmm_src)) | |||
2800 | { | |||
2801 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
2802 | ||||
2803 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
2804 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
2805 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
2806 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
2807 | ||||
2808 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
2809 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
2810 | &xmm_mask, &xmm_mask, | |||
2811 | &xmm_dst_lo, &xmm_dst_hi); | |||
2812 | ||||
2813 | save_128_aligned ( | |||
2814 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
2815 | } | |||
2816 | ||||
2817 | dst += 4; | |||
2818 | src += 4; | |||
2819 | w -= 4; | |||
2820 | } | |||
2821 | ||||
2822 | while (w) | |||
2823 | { | |||
2824 | uint32_t s = *src++; | |||
2825 | ||||
2826 | if (s) | |||
2827 | { | |||
2828 | uint32_t d = *dst; | |||
2829 | ||||
2830 | __m128i ms = unpack_32_1x128 (s); | |||
2831 | __m128i alpha = expand_alpha_1x128 (ms); | |||
2832 | __m128i mask = xmm_mask; | |||
2833 | __m128i dest = unpack_32_1x128 (d); | |||
2834 | ||||
2835 | *dst = pack_1x128_32 ( | |||
2836 | in_over_1x128 (&ms, &alpha, &mask, &dest)); | |||
2837 | } | |||
2838 | ||||
2839 | dst++; | |||
2840 | w--; | |||
2841 | } | |||
2842 | } | |||
2843 | ||||
2844 | } | |||
2845 | ||||
2846 | static void | |||
2847 | sse2_composite_src_x888_0565 (pixman_implementation_t *imp, | |||
2848 | pixman_composite_info_t *info) | |||
2849 | { | |||
2850 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
2851 | uint16_t *dst_line, *dst; | |||
2852 | uint32_t *src_line, *src, s; | |||
2853 | int dst_stride, src_stride; | |||
2854 | int32_t w; | |||
2855 | ||||
2856 | PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
2857 | PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
2858 | ||||
2859 | while (height--) | |||
2860 | { | |||
2861 | dst = dst_line; | |||
2862 | dst_line += dst_stride; | |||
2863 | src = src_line; | |||
2864 | src_line += src_stride; | |||
2865 | w = width; | |||
2866 | ||||
2867 | while (w && (uintptr_t)dst & 15) | |||
2868 | { | |||
2869 | s = *src++; | |||
2870 | *dst = convert_8888_to_0565 (s); | |||
2871 | dst++; | |||
2872 | w--; | |||
2873 | } | |||
2874 | ||||
2875 | while (w >= 8) | |||
2876 | { | |||
2877 | __m128i xmm_src0 = load_128_unaligned ((__m128i *)src + 0); | |||
2878 | __m128i xmm_src1 = load_128_unaligned ((__m128i *)src + 1); | |||
2879 | ||||
2880 | save_128_aligned ((__m128i*)dst, pack_565_2packedx128_128 (xmm_src0, xmm_src1)); | |||
2881 | ||||
2882 | w -= 8; | |||
2883 | src += 8; | |||
2884 | dst += 8; | |||
2885 | } | |||
2886 | ||||
2887 | while (w) | |||
2888 | { | |||
2889 | s = *src++; | |||
2890 | *dst = convert_8888_to_0565 (s); | |||
2891 | dst++; | |||
2892 | w--; | |||
2893 | } | |||
2894 | } | |||
2895 | } | |||
2896 | ||||
2897 | static void | |||
2898 | sse2_composite_src_x888_8888 (pixman_implementation_t *imp, | |||
2899 | pixman_composite_info_t *info) | |||
2900 | { | |||
2901 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
2902 | uint32_t *dst_line, *dst; | |||
2903 | uint32_t *src_line, *src; | |||
2904 | int32_t w; | |||
2905 | int dst_stride, src_stride; | |||
2906 | ||||
2907 | ||||
2908 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
2909 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
2910 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | |||
2911 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
2912 | ||||
2913 | while (height--) | |||
2914 | { | |||
2915 | dst = dst_line; | |||
2916 | dst_line += dst_stride; | |||
2917 | src = src_line; | |||
2918 | src_line += src_stride; | |||
2919 | w = width; | |||
2920 | ||||
2921 | while (w && (uintptr_t)dst & 15) | |||
2922 | { | |||
2923 | *dst++ = *src++ | 0xff000000; | |||
2924 | w--; | |||
2925 | } | |||
2926 | ||||
2927 | while (w >= 16) | |||
2928 | { | |||
2929 | __m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4; | |||
2930 | ||||
2931 | xmm_src1 = load_128_unaligned ((__m128i*)src + 0); | |||
2932 | xmm_src2 = load_128_unaligned ((__m128i*)src + 1); | |||
2933 | xmm_src3 = load_128_unaligned ((__m128i*)src + 2); | |||
2934 | xmm_src4 = load_128_unaligned ((__m128i*)src + 3); | |||
2935 | ||||
2936 | save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000)); | |||
2937 | save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000)); | |||
2938 | save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000)); | |||
2939 | save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000)); | |||
2940 | ||||
2941 | dst += 16; | |||
2942 | src += 16; | |||
2943 | w -= 16; | |||
2944 | } | |||
2945 | ||||
2946 | while (w) | |||
2947 | { | |||
2948 | *dst++ = *src++ | 0xff000000; | |||
2949 | w--; | |||
2950 | } | |||
2951 | } | |||
2952 | ||||
2953 | } | |||
2954 | ||||
2955 | static void | |||
2956 | sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp, | |||
2957 | pixman_composite_info_t *info) | |||
2958 | { | |||
2959 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
2960 | uint32_t *dst_line, *dst; | |||
2961 | uint32_t *src_line, *src; | |||
2962 | uint32_t mask; | |||
2963 | int dst_stride, src_stride; | |||
2964 | int32_t w; | |||
2965 | ||||
2966 | __m128i xmm_mask, xmm_alpha; | |||
2967 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | |||
2968 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
2969 | ||||
2970 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
2971 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
2972 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | |||
2973 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
2974 | ||||
2975 | mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); | |||
2976 | ||||
2977 | xmm_mask = create_mask_16_128 (mask >> 24); | |||
2978 | xmm_alpha = mask_00ff; | |||
2979 | ||||
2980 | while (height--) | |||
2981 | { | |||
2982 | dst = dst_line; | |||
2983 | dst_line += dst_stride; | |||
2984 | src = src_line; | |||
2985 | src_line += src_stride; | |||
2986 | w = width; | |||
2987 | ||||
2988 | while (w && (uintptr_t)dst & 15) | |||
2989 | { | |||
2990 | uint32_t s = (*src++) | 0xff000000; | |||
2991 | uint32_t d = *dst; | |||
2992 | ||||
2993 | __m128i src = unpack_32_1x128 (s); | |||
2994 | __m128i alpha = xmm_alpha; | |||
2995 | __m128i mask = xmm_mask; | |||
2996 | __m128i dest = unpack_32_1x128 (d); | |||
2997 | ||||
2998 | *dst++ = pack_1x128_32 ( | |||
2999 | in_over_1x128 (&src, &alpha, &mask, &dest)); | |||
3000 | ||||
3001 | w--; | |||
3002 | } | |||
3003 | ||||
3004 | while (w >= 4) | |||
3005 | { | |||
3006 | xmm_src = _mm_or_si128 ( | |||
3007 | load_128_unaligned ((__m128i*)src), mask_ff000000); | |||
3008 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
3009 | ||||
3010 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
3011 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
3012 | ||||
3013 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
3014 | &xmm_alpha, &xmm_alpha, | |||
3015 | &xmm_mask, &xmm_mask, | |||
3016 | &xmm_dst_lo, &xmm_dst_hi); | |||
3017 | ||||
3018 | save_128_aligned ( | |||
3019 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
3020 | ||||
3021 | dst += 4; | |||
3022 | src += 4; | |||
3023 | w -= 4; | |||
3024 | ||||
3025 | } | |||
3026 | ||||
3027 | while (w) | |||
3028 | { | |||
3029 | uint32_t s = (*src++) | 0xff000000; | |||
3030 | uint32_t d = *dst; | |||
3031 | ||||
3032 | __m128i src = unpack_32_1x128 (s); | |||
3033 | __m128i alpha = xmm_alpha; | |||
3034 | __m128i mask = xmm_mask; | |||
3035 | __m128i dest = unpack_32_1x128 (d); | |||
3036 | ||||
3037 | *dst++ = pack_1x128_32 ( | |||
3038 | in_over_1x128 (&src, &alpha, &mask, &dest)); | |||
3039 | ||||
3040 | w--; | |||
3041 | } | |||
3042 | } | |||
3043 | ||||
3044 | } | |||
3045 | ||||
3046 | static void | |||
3047 | sse2_composite_over_8888_8888 (pixman_implementation_t *imp, | |||
3048 | pixman_composite_info_t *info) | |||
3049 | { | |||
3050 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
3051 | int dst_stride, src_stride; | |||
3052 | uint32_t *dst_line, *dst; | |||
3053 | uint32_t *src_line, *src; | |||
3054 | ||||
3055 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
3056 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
3057 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | |||
3058 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
3059 | ||||
3060 | dst = dst_line; | |||
3061 | src = src_line; | |||
3062 | ||||
3063 | while (height--) | |||
3064 | { | |||
3065 | sse2_combine_over_u (imp, op, dst, src, NULL((void*)0), width); | |||
3066 | ||||
3067 | dst += dst_stride; | |||
3068 | src += src_stride; | |||
3069 | } | |||
3070 | } | |||
3071 | ||||
3072 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint16_t | |||
3073 | composite_over_8888_0565pixel (uint32_t src, uint16_t dst) | |||
3074 | { | |||
3075 | __m128i ms; | |||
3076 | ||||
3077 | ms = unpack_32_1x128 (src); | |||
3078 | return pack_565_32_16 ( | |||
3079 | pack_1x128_32 ( | |||
3080 | over_1x128 ( | |||
3081 | ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst)))); | |||
3082 | } | |||
3083 | ||||
3084 | static void | |||
3085 | sse2_composite_over_8888_0565 (pixman_implementation_t *imp, | |||
3086 | pixman_composite_info_t *info) | |||
3087 | { | |||
3088 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
3089 | uint16_t *dst_line, *dst, d; | |||
3090 | uint32_t *src_line, *src, s; | |||
3091 | int dst_stride, src_stride; | |||
3092 | int32_t w; | |||
3093 | ||||
3094 | __m128i xmm_alpha_lo, xmm_alpha_hi; | |||
3095 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | |||
3096 | __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; | |||
3097 | ||||
3098 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
3099 | dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
3100 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | |||
3101 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
3102 | ||||
3103 | while (height--) | |||
3104 | { | |||
3105 | dst = dst_line; | |||
3106 | src = src_line; | |||
3107 | ||||
3108 | dst_line += dst_stride; | |||
3109 | src_line += src_stride; | |||
3110 | w = width; | |||
3111 | ||||
3112 | /* Align dst on a 16-byte boundary */ | |||
3113 | while (w && | |||
3114 | ((uintptr_t)dst & 15)) | |||
3115 | { | |||
3116 | s = *src++; | |||
3117 | d = *dst; | |||
3118 | ||||
3119 | *dst++ = composite_over_8888_0565pixel (s, d); | |||
3120 | w--; | |||
3121 | } | |||
3122 | ||||
3123 | /* It's a 8 pixel loop */ | |||
3124 | while (w >= 8) | |||
3125 | { | |||
3126 | /* I'm loading unaligned because I'm not sure | |||
3127 | * about the address alignment. | |||
3128 | */ | |||
3129 | xmm_src = load_128_unaligned ((__m128i*) src); | |||
3130 | xmm_dst = load_128_aligned ((__m128i*) dst); | |||
3131 | ||||
3132 | /* Unpacking */ | |||
3133 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
3134 | unpack_565_128_4x128 (xmm_dst, | |||
3135 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); | |||
3136 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
3137 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
3138 | ||||
3139 | /* I'm loading next 4 pixels from memory | |||
3140 | * before to optimze the memory read. | |||
3141 | */ | |||
3142 | xmm_src = load_128_unaligned ((__m128i*) (src + 4)); | |||
3143 | ||||
3144 | over_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
3145 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
3146 | &xmm_dst0, &xmm_dst1); | |||
3147 | ||||
3148 | /* Unpacking */ | |||
3149 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
3150 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
3151 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
3152 | ||||
3153 | over_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
3154 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
3155 | &xmm_dst2, &xmm_dst3); | |||
3156 | ||||
3157 | save_128_aligned ( | |||
3158 | (__m128i*)dst, pack_565_4x128_128 ( | |||
3159 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); | |||
3160 | ||||
3161 | w -= 8; | |||
3162 | dst += 8; | |||
3163 | src += 8; | |||
3164 | } | |||
3165 | ||||
3166 | while (w--) | |||
3167 | { | |||
3168 | s = *src++; | |||
3169 | d = *dst; | |||
3170 | ||||
3171 | *dst++ = composite_over_8888_0565pixel (s, d); | |||
3172 | } | |||
3173 | } | |||
3174 | ||||
3175 | } | |||
3176 | ||||
3177 | static void | |||
3178 | sse2_composite_over_n_8_8888 (pixman_implementation_t *imp, | |||
3179 | pixman_composite_info_t *info) | |||
3180 | { | |||
3181 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
3182 | uint32_t src, srca; | |||
3183 | uint32_t *dst_line, *dst; | |||
3184 | uint8_t *mask_line, *mask; | |||
3185 | int dst_stride, mask_stride; | |||
3186 | int32_t w; | |||
3187 | uint32_t d; | |||
3188 | ||||
3189 | __m128i xmm_src, xmm_alpha, xmm_def; | |||
3190 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
3191 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | |||
3192 | ||||
3193 | __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; | |||
3194 | ||||
3195 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
3196 | ||||
3197 | srca = src >> 24; | |||
3198 | if (src == 0) | |||
3199 | return; | |||
3200 | ||||
3201 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
3202 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
3203 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | |||
3204 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | |||
3205 | ||||
3206 | xmm_def = create_mask_2x32_128 (src, src); | |||
3207 | xmm_src = expand_pixel_32_1x128 (src); | |||
3208 | xmm_alpha = expand_alpha_1x128 (xmm_src); | |||
3209 | mmx_src = xmm_src; | |||
3210 | mmx_alpha = xmm_alpha; | |||
3211 | ||||
3212 | while (height--) | |||
3213 | { | |||
3214 | dst = dst_line; | |||
3215 | dst_line += dst_stride; | |||
3216 | mask = mask_line; | |||
3217 | mask_line += mask_stride; | |||
3218 | w = width; | |||
3219 | ||||
3220 | while (w && (uintptr_t)dst & 15) | |||
3221 | { | |||
3222 | uint8_t m = *mask++; | |||
3223 | ||||
3224 | if (m) | |||
3225 | { | |||
3226 | d = *dst; | |||
3227 | mmx_mask = expand_pixel_8_1x128 (m); | |||
3228 | mmx_dest = unpack_32_1x128 (d); | |||
3229 | ||||
3230 | *dst = pack_1x128_32 (in_over_1x128 (&mmx_src, | |||
3231 | &mmx_alpha, | |||
3232 | &mmx_mask, | |||
3233 | &mmx_dest)); | |||
3234 | } | |||
3235 | ||||
3236 | w--; | |||
3237 | dst++; | |||
3238 | } | |||
3239 | ||||
3240 | while (w >= 4) | |||
3241 | { | |||
3242 | uint32_t m; | |||
3243 | memcpy(&m, mask, sizeof(uint32_t)); | |||
3244 | ||||
3245 | if (srca == 0xff && m == 0xffffffff) | |||
3246 | { | |||
3247 | save_128_aligned ((__m128i*)dst, xmm_def); | |||
3248 | } | |||
3249 | else if (m) | |||
3250 | { | |||
3251 | xmm_dst = load_128_aligned ((__m128i*) dst); | |||
3252 | xmm_mask = unpack_32_1x128 (m); | |||
3253 | xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); | |||
3254 | ||||
3255 | /* Unpacking */ | |||
3256 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
3257 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
3258 | ||||
3259 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, | |||
3260 | &xmm_mask_lo, &xmm_mask_hi); | |||
3261 | ||||
3262 | in_over_2x128 (&xmm_src, &xmm_src, | |||
3263 | &xmm_alpha, &xmm_alpha, | |||
3264 | &xmm_mask_lo, &xmm_mask_hi, | |||
3265 | &xmm_dst_lo, &xmm_dst_hi); | |||
3266 | ||||
3267 | save_128_aligned ( | |||
3268 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
3269 | } | |||
3270 | ||||
3271 | w -= 4; | |||
3272 | dst += 4; | |||
3273 | mask += 4; | |||
3274 | } | |||
3275 | ||||
3276 | while (w) | |||
3277 | { | |||
3278 | uint8_t m = *mask++; | |||
3279 | ||||
3280 | if (m) | |||
3281 | { | |||
3282 | d = *dst; | |||
3283 | mmx_mask = expand_pixel_8_1x128 (m); | |||
3284 | mmx_dest = unpack_32_1x128 (d); | |||
3285 | ||||
3286 | *dst = pack_1x128_32 (in_over_1x128 (&mmx_src, | |||
3287 | &mmx_alpha, | |||
3288 | &mmx_mask, | |||
3289 | &mmx_dest)); | |||
3290 | } | |||
3291 | ||||
3292 | w--; | |||
3293 | dst++; | |||
3294 | } | |||
3295 | } | |||
3296 | ||||
3297 | } | |||
3298 | ||||
3299 | #if defined(__GNUC__4) && !defined(__x86_64__1) && !defined(__amd64__1) | |||
3300 | __attribute__((__force_align_arg_pointer__)) | |||
3301 | #endif | |||
3302 | static pixman_bool_t | |||
3303 | sse2_fill (pixman_implementation_t *imp, | |||
3304 | uint32_t * bits, | |||
3305 | int stride, | |||
3306 | int bpp, | |||
3307 | int x, | |||
3308 | int y, | |||
3309 | int width, | |||
3310 | int height, | |||
3311 | uint32_t filler) | |||
3312 | { | |||
3313 | uint32_t byte_width; | |||
3314 | uint8_t *byte_line; | |||
3315 | ||||
3316 | __m128i xmm_def; | |||
3317 | ||||
3318 | if (bpp == 8) | |||
3319 | { | |||
3320 | uint32_t b; | |||
3321 | uint32_t w; | |||
3322 | ||||
3323 | stride = stride * (int) sizeof (uint32_t) / 1; | |||
3324 | byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); | |||
3325 | byte_width = width; | |||
3326 | stride *= 1; | |||
3327 | ||||
3328 | b = filler & 0xff; | |||
3329 | w = (b << 8) | b; | |||
3330 | filler = (w << 16) | w; | |||
3331 | } | |||
3332 | else if (bpp == 16) | |||
3333 | { | |||
3334 | stride = stride * (int) sizeof (uint32_t) / 2; | |||
3335 | byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); | |||
3336 | byte_width = 2 * width; | |||
3337 | stride *= 2; | |||
3338 | ||||
3339 | filler = (filler & 0xffff) * 0x00010001; | |||
3340 | } | |||
3341 | else if (bpp == 32) | |||
3342 | { | |||
3343 | stride = stride * (int) sizeof (uint32_t) / 4; | |||
3344 | byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); | |||
3345 | byte_width = 4 * width; | |||
3346 | stride *= 4; | |||
3347 | } | |||
3348 | else | |||
3349 | { | |||
3350 | return FALSE0; | |||
3351 | } | |||
3352 | ||||
3353 | xmm_def = create_mask_2x32_128 (filler, filler); | |||
3354 | ||||
3355 | while (height--) | |||
3356 | { | |||
3357 | int w; | |||
3358 | uint8_t *d = byte_line; | |||
3359 | byte_line += stride; | |||
3360 | w = byte_width; | |||
3361 | ||||
3362 | if (w >= 1 && ((uintptr_t)d & 1)) | |||
3363 | { | |||
3364 | *(uint8_t *)d = filler; | |||
3365 | w -= 1; | |||
3366 | d += 1; | |||
3367 | } | |||
3368 | ||||
3369 | while (w >= 2 && ((uintptr_t)d & 3)) | |||
3370 | { | |||
3371 | *(uint16_t *)d = filler; | |||
3372 | w -= 2; | |||
3373 | d += 2; | |||
3374 | } | |||
3375 | ||||
3376 | while (w >= 4 && ((uintptr_t)d & 15)) | |||
3377 | { | |||
3378 | *(uint32_t *)d = filler; | |||
3379 | ||||
3380 | w -= 4; | |||
3381 | d += 4; | |||
3382 | } | |||
3383 | ||||
3384 | while (w >= 128) | |||
3385 | { | |||
3386 | save_128_aligned ((__m128i*)(d), xmm_def); | |||
3387 | save_128_aligned ((__m128i*)(d + 16), xmm_def); | |||
3388 | save_128_aligned ((__m128i*)(d + 32), xmm_def); | |||
3389 | save_128_aligned ((__m128i*)(d + 48), xmm_def); | |||
3390 | save_128_aligned ((__m128i*)(d + 64), xmm_def); | |||
3391 | save_128_aligned ((__m128i*)(d + 80), xmm_def); | |||
3392 | save_128_aligned ((__m128i*)(d + 96), xmm_def); | |||
3393 | save_128_aligned ((__m128i*)(d + 112), xmm_def); | |||
3394 | ||||
3395 | d += 128; | |||
3396 | w -= 128; | |||
3397 | } | |||
3398 | ||||
3399 | if (w >= 64) | |||
3400 | { | |||
3401 | save_128_aligned ((__m128i*)(d), xmm_def); | |||
3402 | save_128_aligned ((__m128i*)(d + 16), xmm_def); | |||
3403 | save_128_aligned ((__m128i*)(d + 32), xmm_def); | |||
3404 | save_128_aligned ((__m128i*)(d + 48), xmm_def); | |||
3405 | ||||
3406 | d += 64; | |||
3407 | w -= 64; | |||
3408 | } | |||
3409 | ||||
3410 | if (w >= 32) | |||
3411 | { | |||
3412 | save_128_aligned ((__m128i*)(d), xmm_def); | |||
3413 | save_128_aligned ((__m128i*)(d + 16), xmm_def); | |||
3414 | ||||
3415 | d += 32; | |||
3416 | w -= 32; | |||
3417 | } | |||
3418 | ||||
3419 | if (w >= 16) | |||
3420 | { | |||
3421 | save_128_aligned ((__m128i*)(d), xmm_def); | |||
3422 | ||||
3423 | d += 16; | |||
3424 | w -= 16; | |||
3425 | } | |||
3426 | ||||
3427 | while (w >= 4) | |||
3428 | { | |||
3429 | *(uint32_t *)d = filler; | |||
3430 | ||||
3431 | w -= 4; | |||
3432 | d += 4; | |||
3433 | } | |||
3434 | ||||
3435 | if (w >= 2) | |||
3436 | { | |||
3437 | *(uint16_t *)d = filler; | |||
3438 | w -= 2; | |||
3439 | d += 2; | |||
3440 | } | |||
3441 | ||||
3442 | if (w >= 1) | |||
3443 | { | |||
3444 | *(uint8_t *)d = filler; | |||
3445 | w -= 1; | |||
3446 | d += 1; | |||
3447 | } | |||
3448 | } | |||
3449 | ||||
3450 | return TRUE1; | |||
3451 | } | |||
3452 | ||||
3453 | static void | |||
3454 | sse2_composite_src_n_8_8888 (pixman_implementation_t *imp, | |||
3455 | pixman_composite_info_t *info) | |||
3456 | { | |||
3457 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
3458 | uint32_t src, srca; | |||
3459 | uint32_t *dst_line, *dst; | |||
3460 | uint8_t *mask_line, *mask; | |||
3461 | int dst_stride, mask_stride; | |||
3462 | int32_t w; | |||
3463 | ||||
3464 | __m128i xmm_src, xmm_def; | |||
3465 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | |||
3466 | ||||
3467 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
3468 | ||||
3469 | srca = src >> 24; | |||
3470 | if (src == 0) | |||
3471 | { | |||
3472 | sse2_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride, | |||
3473 | PIXMAN_FORMAT_BPP (dest_image->bits.format)(((dest_image->bits.format >> (24)) & ((1 << (8)) - 1)) << ((dest_image->bits.format >> 22 ) & 3)), | |||
3474 | dest_x, dest_y, width, height, 0); | |||
3475 | return; | |||
3476 | } | |||
3477 | ||||
3478 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
3479 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
3480 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | |||
3481 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | |||
3482 | ||||
3483 | xmm_def = create_mask_2x32_128 (src, src); | |||
3484 | xmm_src = expand_pixel_32_1x128 (src); | |||
3485 | ||||
3486 | while (height--) | |||
3487 | { | |||
3488 | dst = dst_line; | |||
3489 | dst_line += dst_stride; | |||
3490 | mask = mask_line; | |||
3491 | mask_line += mask_stride; | |||
3492 | w = width; | |||
3493 | ||||
3494 | while (w && (uintptr_t)dst & 15) | |||
3495 | { | |||
3496 | uint8_t m = *mask++; | |||
3497 | ||||
3498 | if (m) | |||
3499 | { | |||
3500 | *dst = pack_1x128_32 ( | |||
3501 | pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m))); | |||
3502 | } | |||
3503 | else | |||
3504 | { | |||
3505 | *dst = 0; | |||
3506 | } | |||
3507 | ||||
3508 | w--; | |||
3509 | dst++; | |||
3510 | } | |||
3511 | ||||
3512 | while (w >= 4) | |||
3513 | { | |||
3514 | uint32_t m; | |||
3515 | memcpy(&m, mask, sizeof(uint32_t)); | |||
3516 | ||||
3517 | if (srca == 0xff && m == 0xffffffff) | |||
3518 | { | |||
3519 | save_128_aligned ((__m128i*)dst, xmm_def); | |||
3520 | } | |||
3521 | else if (m) | |||
3522 | { | |||
3523 | xmm_mask = unpack_32_1x128 (m); | |||
3524 | xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); | |||
3525 | ||||
3526 | /* Unpacking */ | |||
3527 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
3528 | ||||
3529 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, | |||
3530 | &xmm_mask_lo, &xmm_mask_hi); | |||
3531 | ||||
3532 | pix_multiply_2x128 (&xmm_src, &xmm_src, | |||
3533 | &xmm_mask_lo, &xmm_mask_hi, | |||
3534 | &xmm_mask_lo, &xmm_mask_hi); | |||
3535 | ||||
3536 | save_128_aligned ( | |||
3537 | (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi)); | |||
3538 | } | |||
3539 | else | |||
3540 | { | |||
3541 | save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ()); | |||
3542 | } | |||
3543 | ||||
3544 | w -= 4; | |||
3545 | dst += 4; | |||
3546 | mask += 4; | |||
3547 | } | |||
3548 | ||||
3549 | while (w) | |||
3550 | { | |||
3551 | uint8_t m = *mask++; | |||
3552 | ||||
3553 | if (m) | |||
3554 | { | |||
3555 | *dst = pack_1x128_32 ( | |||
3556 | pix_multiply_1x128 ( | |||
3557 | xmm_src, expand_pixel_8_1x128 (m))); | |||
3558 | } | |||
3559 | else | |||
3560 | { | |||
3561 | *dst = 0; | |||
3562 | } | |||
3563 | ||||
3564 | w--; | |||
3565 | dst++; | |||
3566 | } | |||
3567 | } | |||
3568 | ||||
3569 | } | |||
3570 | ||||
3571 | static void | |||
3572 | sse2_composite_over_n_8_0565 (pixman_implementation_t *imp, | |||
3573 | pixman_composite_info_t *info) | |||
3574 | { | |||
3575 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
3576 | uint32_t src; | |||
3577 | uint16_t *dst_line, *dst, d; | |||
3578 | uint8_t *mask_line, *mask; | |||
3579 | int dst_stride, mask_stride; | |||
3580 | int32_t w; | |||
3581 | __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; | |||
3582 | ||||
3583 | __m128i xmm_src, xmm_alpha; | |||
3584 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | |||
3585 | __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; | |||
3586 | ||||
3587 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
3588 | ||||
3589 | if (src == 0) | |||
3590 | return; | |||
3591 | ||||
3592 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
3593 | dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
3594 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | |||
3595 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | |||
3596 | ||||
3597 | xmm_src = expand_pixel_32_1x128 (src); | |||
3598 | xmm_alpha = expand_alpha_1x128 (xmm_src); | |||
3599 | mmx_src = xmm_src; | |||
3600 | mmx_alpha = xmm_alpha; | |||
3601 | ||||
3602 | while (height--) | |||
3603 | { | |||
3604 | dst = dst_line; | |||
3605 | dst_line += dst_stride; | |||
3606 | mask = mask_line; | |||
3607 | mask_line += mask_stride; | |||
3608 | w = width; | |||
3609 | ||||
3610 | while (w && (uintptr_t)dst & 15) | |||
3611 | { | |||
3612 | uint8_t m = *mask++; | |||
3613 | ||||
3614 | if (m) | |||
3615 | { | |||
3616 | d = *dst; | |||
3617 | mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); | |||
3618 | mmx_dest = expand565_16_1x128 (d); | |||
3619 | ||||
3620 | *dst = pack_565_32_16 ( | |||
3621 | pack_1x128_32 ( | |||
3622 | in_over_1x128 ( | |||
3623 | &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); | |||
3624 | } | |||
3625 | ||||
3626 | w--; | |||
3627 | dst++; | |||
3628 | } | |||
3629 | ||||
3630 | while (w >= 8) | |||
3631 | { | |||
3632 | uint32_t m; | |||
3633 | ||||
3634 | xmm_dst = load_128_aligned ((__m128i*) dst); | |||
3635 | unpack_565_128_4x128 (xmm_dst, | |||
3636 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); | |||
3637 | ||||
3638 | memcpy(&m, mask, sizeof(uint32_t)); | |||
3639 | mask += 4; | |||
3640 | ||||
3641 | if (m) | |||
3642 | { | |||
3643 | xmm_mask = unpack_32_1x128 (m); | |||
3644 | xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); | |||
3645 | ||||
3646 | /* Unpacking */ | |||
3647 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
3648 | ||||
3649 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, | |||
3650 | &xmm_mask_lo, &xmm_mask_hi); | |||
3651 | ||||
3652 | in_over_2x128 (&xmm_src, &xmm_src, | |||
3653 | &xmm_alpha, &xmm_alpha, | |||
3654 | &xmm_mask_lo, &xmm_mask_hi, | |||
3655 | &xmm_dst0, &xmm_dst1); | |||
3656 | } | |||
3657 | ||||
3658 | memcpy(&m, mask, sizeof(uint32_t)); | |||
3659 | mask += 4; | |||
3660 | ||||
3661 | if (m) | |||
3662 | { | |||
3663 | xmm_mask = unpack_32_1x128 (m); | |||
3664 | xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); | |||
3665 | ||||
3666 | /* Unpacking */ | |||
3667 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
3668 | ||||
3669 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, | |||
3670 | &xmm_mask_lo, &xmm_mask_hi); | |||
3671 | in_over_2x128 (&xmm_src, &xmm_src, | |||
3672 | &xmm_alpha, &xmm_alpha, | |||
3673 | &xmm_mask_lo, &xmm_mask_hi, | |||
3674 | &xmm_dst2, &xmm_dst3); | |||
3675 | } | |||
3676 | ||||
3677 | save_128_aligned ( | |||
3678 | (__m128i*)dst, pack_565_4x128_128 ( | |||
3679 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); | |||
3680 | ||||
3681 | w -= 8; | |||
3682 | dst += 8; | |||
3683 | } | |||
3684 | ||||
3685 | while (w) | |||
3686 | { | |||
3687 | uint8_t m = *mask++; | |||
3688 | ||||
3689 | if (m) | |||
3690 | { | |||
3691 | d = *dst; | |||
3692 | mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); | |||
3693 | mmx_dest = expand565_16_1x128 (d); | |||
3694 | ||||
3695 | *dst = pack_565_32_16 ( | |||
3696 | pack_1x128_32 ( | |||
3697 | in_over_1x128 ( | |||
3698 | &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); | |||
3699 | } | |||
3700 | ||||
3701 | w--; | |||
3702 | dst++; | |||
3703 | } | |||
3704 | } | |||
3705 | ||||
3706 | } | |||
3707 | ||||
3708 | static void | |||
3709 | sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp, | |||
3710 | pixman_composite_info_t *info) | |||
3711 | { | |||
3712 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
3713 | uint16_t *dst_line, *dst, d; | |||
3714 | uint32_t *src_line, *src, s; | |||
3715 | int dst_stride, src_stride; | |||
3716 | int32_t w; | |||
3717 | uint32_t opaque, zero; | |||
3718 | ||||
3719 | __m128i ms; | |||
3720 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | |||
3721 | __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; | |||
3722 | ||||
3723 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
3724 | dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
3725 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | |||
3726 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
3727 | ||||
3728 | while (height--) | |||
3729 | { | |||
3730 | dst = dst_line; | |||
3731 | dst_line += dst_stride; | |||
3732 | src = src_line; | |||
3733 | src_line += src_stride; | |||
3734 | w = width; | |||
3735 | ||||
3736 | while (w && (uintptr_t)dst & 15) | |||
3737 | { | |||
3738 | s = *src++; | |||
3739 | d = *dst; | |||
3740 | ||||
3741 | ms = unpack_32_1x128 (s); | |||
3742 | ||||
3743 | *dst++ = pack_565_32_16 ( | |||
3744 | pack_1x128_32 ( | |||
3745 | over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d)))); | |||
3746 | w--; | |||
3747 | } | |||
3748 | ||||
3749 | while (w >= 8) | |||
3750 | { | |||
3751 | /* First round */ | |||
3752 | xmm_src = load_128_unaligned ((__m128i*)src); | |||
3753 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
3754 | ||||
3755 | opaque = is_opaque (xmm_src); | |||
3756 | zero = is_zero (xmm_src); | |||
3757 | ||||
3758 | unpack_565_128_4x128 (xmm_dst, | |||
3759 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); | |||
3760 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
3761 | ||||
3762 | /* preload next round*/ | |||
3763 | xmm_src = load_128_unaligned ((__m128i*)(src + 4)); | |||
3764 | ||||
3765 | if (opaque) | |||
3766 | { | |||
3767 | invert_colors_2x128 (xmm_src_lo, xmm_src_hi, | |||
3768 | &xmm_dst0, &xmm_dst1); | |||
3769 | } | |||
3770 | else if (!zero) | |||
3771 | { | |||
3772 | over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, | |||
3773 | &xmm_dst0, &xmm_dst1); | |||
3774 | } | |||
3775 | ||||
3776 | /* Second round */ | |||
3777 | opaque = is_opaque (xmm_src); | |||
3778 | zero = is_zero (xmm_src); | |||
3779 | ||||
3780 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
3781 | ||||
3782 | if (opaque) | |||
3783 | { | |||
3784 | invert_colors_2x128 (xmm_src_lo, xmm_src_hi, | |||
3785 | &xmm_dst2, &xmm_dst3); | |||
3786 | } | |||
3787 | else if (!zero) | |||
3788 | { | |||
3789 | over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, | |||
3790 | &xmm_dst2, &xmm_dst3); | |||
3791 | } | |||
3792 | ||||
3793 | save_128_aligned ( | |||
3794 | (__m128i*)dst, pack_565_4x128_128 ( | |||
3795 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); | |||
3796 | ||||
3797 | w -= 8; | |||
3798 | src += 8; | |||
3799 | dst += 8; | |||
3800 | } | |||
3801 | ||||
3802 | while (w) | |||
3803 | { | |||
3804 | s = *src++; | |||
3805 | d = *dst; | |||
3806 | ||||
3807 | ms = unpack_32_1x128 (s); | |||
3808 | ||||
3809 | *dst++ = pack_565_32_16 ( | |||
3810 | pack_1x128_32 ( | |||
3811 | over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d)))); | |||
3812 | w--; | |||
3813 | } | |||
3814 | } | |||
3815 | ||||
3816 | } | |||
3817 | ||||
3818 | static void | |||
3819 | sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp, | |||
3820 | pixman_composite_info_t *info) | |||
3821 | { | |||
3822 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
3823 | uint32_t *dst_line, *dst, d; | |||
3824 | uint32_t *src_line, *src, s; | |||
3825 | int dst_stride, src_stride; | |||
3826 | int32_t w; | |||
3827 | uint32_t opaque, zero; | |||
3828 | ||||
3829 | __m128i xmm_src_lo, xmm_src_hi; | |||
3830 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
3831 | ||||
3832 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
3833 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
3834 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | |||
3835 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
3836 | ||||
3837 | while (height--) | |||
3838 | { | |||
3839 | dst = dst_line; | |||
3840 | dst_line += dst_stride; | |||
3841 | src = src_line; | |||
3842 | src_line += src_stride; | |||
3843 | w = width; | |||
3844 | ||||
3845 | while (w && (uintptr_t)dst & 15) | |||
3846 | { | |||
3847 | s = *src++; | |||
3848 | d = *dst; | |||
3849 | ||||
3850 | *dst++ = pack_1x128_32 ( | |||
3851 | over_rev_non_pre_1x128 ( | |||
3852 | unpack_32_1x128 (s), unpack_32_1x128 (d))); | |||
3853 | ||||
3854 | w--; | |||
3855 | } | |||
3856 | ||||
3857 | while (w >= 4) | |||
3858 | { | |||
3859 | xmm_src_hi = load_128_unaligned ((__m128i*)src); | |||
3860 | ||||
3861 | opaque = is_opaque (xmm_src_hi); | |||
3862 | zero = is_zero (xmm_src_hi); | |||
3863 | ||||
3864 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
3865 | ||||
3866 | if (opaque) | |||
3867 | { | |||
3868 | invert_colors_2x128 (xmm_src_lo, xmm_src_hi, | |||
3869 | &xmm_dst_lo, &xmm_dst_hi); | |||
3870 | ||||
3871 | save_128_aligned ( | |||
3872 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
3873 | } | |||
3874 | else if (!zero) | |||
3875 | { | |||
3876 | xmm_dst_hi = load_128_aligned ((__m128i*)dst); | |||
3877 | ||||
3878 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
3879 | ||||
3880 | over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, | |||
3881 | &xmm_dst_lo, &xmm_dst_hi); | |||
3882 | ||||
3883 | save_128_aligned ( | |||
3884 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
3885 | } | |||
3886 | ||||
3887 | w -= 4; | |||
3888 | dst += 4; | |||
3889 | src += 4; | |||
3890 | } | |||
3891 | ||||
3892 | while (w) | |||
3893 | { | |||
3894 | s = *src++; | |||
3895 | d = *dst; | |||
3896 | ||||
3897 | *dst++ = pack_1x128_32 ( | |||
3898 | over_rev_non_pre_1x128 ( | |||
3899 | unpack_32_1x128 (s), unpack_32_1x128 (d))); | |||
3900 | ||||
3901 | w--; | |||
3902 | } | |||
3903 | } | |||
3904 | ||||
3905 | } | |||
3906 | ||||
3907 | static void | |||
3908 | sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, | |||
3909 | pixman_composite_info_t *info) | |||
3910 | { | |||
3911 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
3912 | uint32_t src; | |||
3913 | uint16_t *dst_line, *dst, d; | |||
3914 | uint32_t *mask_line, *mask, m; | |||
3915 | int dst_stride, mask_stride; | |||
3916 | int w; | |||
3917 | uint32_t pack_cmp; | |||
3918 | ||||
3919 | __m128i xmm_src, xmm_alpha; | |||
3920 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | |||
3921 | __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; | |||
3922 | ||||
3923 | __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; | |||
3924 | ||||
3925 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
3926 | ||||
3927 | if (src == 0) | |||
3928 | return; | |||
3929 | ||||
3930 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
3931 | dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
3932 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | |||
3933 | mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | |||
3934 | ||||
3935 | xmm_src = expand_pixel_32_1x128 (src); | |||
3936 | xmm_alpha = expand_alpha_1x128 (xmm_src); | |||
3937 | mmx_src = xmm_src; | |||
3938 | mmx_alpha = xmm_alpha; | |||
3939 | ||||
3940 | while (height--) | |||
3941 | { | |||
3942 | w = width; | |||
3943 | mask = mask_line; | |||
3944 | dst = dst_line; | |||
3945 | mask_line += mask_stride; | |||
3946 | dst_line += dst_stride; | |||
3947 | ||||
3948 | while (w && ((uintptr_t)dst & 15)) | |||
3949 | { | |||
3950 | m = *(uint32_t *) mask; | |||
3951 | ||||
3952 | if (m) | |||
3953 | { | |||
3954 | d = *dst; | |||
3955 | mmx_mask = unpack_32_1x128 (m); | |||
3956 | mmx_dest = expand565_16_1x128 (d); | |||
3957 | ||||
3958 | *dst = pack_565_32_16 ( | |||
3959 | pack_1x128_32 ( | |||
3960 | in_over_1x128 ( | |||
3961 | &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); | |||
3962 | } | |||
3963 | ||||
3964 | w--; | |||
3965 | dst++; | |||
3966 | mask++; | |||
3967 | } | |||
3968 | ||||
3969 | while (w >= 8) | |||
3970 | { | |||
3971 | /* First round */ | |||
3972 | xmm_mask = load_128_unaligned ((__m128i*)mask); | |||
3973 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
3974 | ||||
3975 | pack_cmp = _mm_movemask_epi8 ( | |||
3976 | _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); | |||
3977 | ||||
3978 | unpack_565_128_4x128 (xmm_dst, | |||
3979 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); | |||
3980 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
3981 | ||||
3982 | /* preload next round */ | |||
3983 | xmm_mask = load_128_unaligned ((__m128i*)(mask + 4)); | |||
3984 | ||||
3985 | /* preload next round */ | |||
3986 | if (pack_cmp != 0xffff) | |||
3987 | { | |||
3988 | in_over_2x128 (&xmm_src, &xmm_src, | |||
3989 | &xmm_alpha, &xmm_alpha, | |||
3990 | &xmm_mask_lo, &xmm_mask_hi, | |||
3991 | &xmm_dst0, &xmm_dst1); | |||
3992 | } | |||
3993 | ||||
3994 | /* Second round */ | |||
3995 | pack_cmp = _mm_movemask_epi8 ( | |||
3996 | _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); | |||
3997 | ||||
3998 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
3999 | ||||
4000 | if (pack_cmp != 0xffff) | |||
4001 | { | |||
4002 | in_over_2x128 (&xmm_src, &xmm_src, | |||
4003 | &xmm_alpha, &xmm_alpha, | |||
4004 | &xmm_mask_lo, &xmm_mask_hi, | |||
4005 | &xmm_dst2, &xmm_dst3); | |||
4006 | } | |||
4007 | ||||
4008 | save_128_aligned ( | |||
4009 | (__m128i*)dst, pack_565_4x128_128 ( | |||
4010 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); | |||
4011 | ||||
4012 | w -= 8; | |||
4013 | dst += 8; | |||
4014 | mask += 8; | |||
4015 | } | |||
4016 | ||||
4017 | while (w) | |||
4018 | { | |||
4019 | m = *(uint32_t *) mask; | |||
4020 | ||||
4021 | if (m) | |||
4022 | { | |||
4023 | d = *dst; | |||
4024 | mmx_mask = unpack_32_1x128 (m); | |||
4025 | mmx_dest = expand565_16_1x128 (d); | |||
4026 | ||||
4027 | *dst = pack_565_32_16 ( | |||
4028 | pack_1x128_32 ( | |||
4029 | in_over_1x128 ( | |||
4030 | &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); | |||
4031 | } | |||
4032 | ||||
4033 | w--; | |||
4034 | dst++; | |||
4035 | mask++; | |||
4036 | } | |||
4037 | } | |||
4038 | ||||
4039 | } | |||
4040 | ||||
4041 | static void | |||
4042 | sse2_composite_in_n_8_8 (pixman_implementation_t *imp, | |||
4043 | pixman_composite_info_t *info) | |||
4044 | { | |||
4045 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
4046 | uint8_t *dst_line, *dst; | |||
4047 | uint8_t *mask_line, *mask; | |||
4048 | int dst_stride, mask_stride; | |||
4049 | uint32_t d; | |||
4050 | uint32_t src; | |||
4051 | int32_t w; | |||
4052 | ||||
4053 | __m128i xmm_alpha; | |||
4054 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | |||
4055 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
4056 | ||||
4057 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
4058 | dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
4059 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | |||
4060 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | |||
4061 | ||||
4062 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
4063 | ||||
4064 | xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); | |||
4065 | ||||
4066 | while (height--) | |||
4067 | { | |||
4068 | dst = dst_line; | |||
4069 | dst_line += dst_stride; | |||
4070 | mask = mask_line; | |||
4071 | mask_line += mask_stride; | |||
4072 | w = width; | |||
4073 | ||||
4074 | while (w && ((uintptr_t)dst & 15)) | |||
4075 | { | |||
4076 | uint8_t m = *mask++; | |||
4077 | d = (uint32_t) *dst; | |||
4078 | ||||
4079 | *dst++ = (uint8_t) pack_1x128_32 ( | |||
4080 | pix_multiply_1x128 ( | |||
4081 | pix_multiply_1x128 (xmm_alpha, | |||
4082 | unpack_32_1x128 (m)), | |||
4083 | unpack_32_1x128 (d))); | |||
4084 | w--; | |||
4085 | } | |||
4086 | ||||
4087 | while (w >= 16) | |||
4088 | { | |||
4089 | xmm_mask = load_128_unaligned ((__m128i*)mask); | |||
4090 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
4091 | ||||
4092 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
4093 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
4094 | ||||
4095 | pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, | |||
4096 | &xmm_mask_lo, &xmm_mask_hi, | |||
4097 | &xmm_mask_lo, &xmm_mask_hi); | |||
4098 | ||||
4099 | pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, | |||
4100 | &xmm_dst_lo, &xmm_dst_hi, | |||
4101 | &xmm_dst_lo, &xmm_dst_hi); | |||
4102 | ||||
4103 | save_128_aligned ( | |||
4104 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
4105 | ||||
4106 | mask += 16; | |||
4107 | dst += 16; | |||
4108 | w -= 16; | |||
4109 | } | |||
4110 | ||||
4111 | while (w) | |||
4112 | { | |||
4113 | uint8_t m = *mask++; | |||
4114 | d = (uint32_t) *dst; | |||
4115 | ||||
4116 | *dst++ = (uint8_t) pack_1x128_32 ( | |||
4117 | pix_multiply_1x128 ( | |||
4118 | pix_multiply_1x128 ( | |||
4119 | xmm_alpha, unpack_32_1x128 (m)), | |||
4120 | unpack_32_1x128 (d))); | |||
4121 | w--; | |||
4122 | } | |||
4123 | } | |||
4124 | ||||
4125 | } | |||
4126 | ||||
4127 | static void | |||
4128 | sse2_composite_in_n_8 (pixman_implementation_t *imp, | |||
4129 | pixman_composite_info_t *info) | |||
4130 | { | |||
4131 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
4132 | uint8_t *dst_line, *dst; | |||
4133 | int dst_stride; | |||
4134 | uint32_t d; | |||
4135 | uint32_t src; | |||
4136 | int32_t w; | |||
4137 | ||||
4138 | __m128i xmm_alpha; | |||
4139 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
4140 | ||||
4141 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
4142 | dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
4143 | ||||
4144 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
4145 | ||||
4146 | xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); | |||
4147 | ||||
4148 | src = src >> 24; | |||
4149 | ||||
4150 | if (src == 0xff) | |||
4151 | return; | |||
4152 | ||||
4153 | if (src == 0x00) | |||
4154 | { | |||
4155 | pixman_fill_moz_pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, | |||
4156 | 8, dest_x, dest_y, width, height, src); | |||
4157 | ||||
4158 | return; | |||
4159 | } | |||
4160 | ||||
4161 | while (height--) | |||
4162 | { | |||
4163 | dst = dst_line; | |||
4164 | dst_line += dst_stride; | |||
4165 | w = width; | |||
4166 | ||||
4167 | while (w && ((uintptr_t)dst & 15)) | |||
4168 | { | |||
4169 | d = (uint32_t) *dst; | |||
4170 | ||||
4171 | *dst++ = (uint8_t) pack_1x128_32 ( | |||
4172 | pix_multiply_1x128 ( | |||
4173 | xmm_alpha, | |||
4174 | unpack_32_1x128 (d))); | |||
4175 | w--; | |||
4176 | } | |||
4177 | ||||
4178 | while (w >= 16) | |||
4179 | { | |||
4180 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
4181 | ||||
4182 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
4183 | ||||
4184 | pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, | |||
4185 | &xmm_dst_lo, &xmm_dst_hi, | |||
4186 | &xmm_dst_lo, &xmm_dst_hi); | |||
4187 | ||||
4188 | save_128_aligned ( | |||
4189 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
4190 | ||||
4191 | dst += 16; | |||
4192 | w -= 16; | |||
4193 | } | |||
4194 | ||||
4195 | while (w) | |||
4196 | { | |||
4197 | d = (uint32_t) *dst; | |||
4198 | ||||
4199 | *dst++ = (uint8_t) pack_1x128_32 ( | |||
4200 | pix_multiply_1x128 ( | |||
4201 | xmm_alpha, | |||
4202 | unpack_32_1x128 (d))); | |||
4203 | w--; | |||
4204 | } | |||
4205 | } | |||
4206 | ||||
4207 | } | |||
4208 | ||||
4209 | static void | |||
4210 | sse2_composite_in_8_8 (pixman_implementation_t *imp, | |||
4211 | pixman_composite_info_t *info) | |||
4212 | { | |||
4213 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
4214 | uint8_t *dst_line, *dst; | |||
4215 | uint8_t *src_line, *src; | |||
4216 | int src_stride, dst_stride; | |||
4217 | int32_t w; | |||
4218 | uint32_t s, d; | |||
4219 | ||||
4220 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | |||
4221 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
4222 | ||||
4223 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
4224 | dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
4225 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | |||
4226 | src_image, src_x, src_y, uint8_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
4227 | ||||
4228 | while (height--) | |||
4229 | { | |||
4230 | dst = dst_line; | |||
4231 | dst_line += dst_stride; | |||
4232 | src = src_line; | |||
4233 | src_line += src_stride; | |||
4234 | w = width; | |||
4235 | ||||
4236 | while (w && ((uintptr_t)dst & 15)) | |||
4237 | { | |||
4238 | s = (uint32_t) *src++; | |||
4239 | d = (uint32_t) *dst; | |||
4240 | ||||
4241 | *dst++ = (uint8_t) pack_1x128_32 ( | |||
4242 | pix_multiply_1x128 ( | |||
4243 | unpack_32_1x128 (s), unpack_32_1x128 (d))); | |||
4244 | w--; | |||
4245 | } | |||
4246 | ||||
4247 | while (w >= 16) | |||
4248 | { | |||
4249 | xmm_src = load_128_unaligned ((__m128i*)src); | |||
4250 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
4251 | ||||
4252 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
4253 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
4254 | ||||
4255 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
4256 | &xmm_dst_lo, &xmm_dst_hi, | |||
4257 | &xmm_dst_lo, &xmm_dst_hi); | |||
4258 | ||||
4259 | save_128_aligned ( | |||
4260 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
4261 | ||||
4262 | src += 16; | |||
4263 | dst += 16; | |||
4264 | w -= 16; | |||
4265 | } | |||
4266 | ||||
4267 | while (w) | |||
4268 | { | |||
4269 | s = (uint32_t) *src++; | |||
4270 | d = (uint32_t) *dst; | |||
4271 | ||||
4272 | *dst++ = (uint8_t) pack_1x128_32 ( | |||
4273 | pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d))); | |||
4274 | w--; | |||
4275 | } | |||
4276 | } | |||
4277 | ||||
4278 | } | |||
4279 | ||||
4280 | static void | |||
4281 | sse2_composite_add_n_8_8 (pixman_implementation_t *imp, | |||
4282 | pixman_composite_info_t *info) | |||
4283 | { | |||
4284 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
4285 | uint8_t *dst_line, *dst; | |||
4286 | uint8_t *mask_line, *mask; | |||
4287 | int dst_stride, mask_stride; | |||
4288 | int32_t w; | |||
4289 | uint32_t src; | |||
4290 | uint32_t d; | |||
4291 | ||||
4292 | __m128i xmm_alpha; | |||
4293 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | |||
4294 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
4295 | ||||
4296 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
4297 | dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
4298 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | |||
4299 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | |||
4300 | ||||
4301 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
4302 | ||||
4303 | xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); | |||
4304 | ||||
4305 | while (height--) | |||
4306 | { | |||
4307 | dst = dst_line; | |||
4308 | dst_line += dst_stride; | |||
4309 | mask = mask_line; | |||
4310 | mask_line += mask_stride; | |||
4311 | w = width; | |||
4312 | ||||
4313 | while (w && ((uintptr_t)dst & 15)) | |||
4314 | { | |||
4315 | uint8_t m = *mask++; | |||
4316 | d = (uint32_t) *dst; | |||
4317 | ||||
4318 | *dst++ = (uint8_t) pack_1x128_32 ( | |||
4319 | _mm_adds_epu16 ( | |||
4320 | pix_multiply_1x128 ( | |||
4321 | xmm_alpha, unpack_32_1x128 (m)), | |||
4322 | unpack_32_1x128 (d))); | |||
4323 | w--; | |||
4324 | } | |||
4325 | ||||
4326 | while (w >= 16) | |||
4327 | { | |||
4328 | xmm_mask = load_128_unaligned ((__m128i*)mask); | |||
4329 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
4330 | ||||
4331 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
4332 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
4333 | ||||
4334 | pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, | |||
4335 | &xmm_mask_lo, &xmm_mask_hi, | |||
4336 | &xmm_mask_lo, &xmm_mask_hi); | |||
4337 | ||||
4338 | xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); | |||
4339 | xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); | |||
4340 | ||||
4341 | save_128_aligned ( | |||
4342 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
4343 | ||||
4344 | mask += 16; | |||
4345 | dst += 16; | |||
4346 | w -= 16; | |||
4347 | } | |||
4348 | ||||
4349 | while (w) | |||
4350 | { | |||
4351 | uint8_t m = (uint32_t) *mask++; | |||
4352 | d = (uint32_t) *dst; | |||
4353 | ||||
4354 | *dst++ = (uint8_t) pack_1x128_32 ( | |||
4355 | _mm_adds_epu16 ( | |||
4356 | pix_multiply_1x128 ( | |||
4357 | xmm_alpha, unpack_32_1x128 (m)), | |||
4358 | unpack_32_1x128 (d))); | |||
4359 | ||||
4360 | w--; | |||
4361 | } | |||
4362 | } | |||
4363 | ||||
4364 | } | |||
4365 | ||||
4366 | static void | |||
4367 | sse2_composite_add_n_8 (pixman_implementation_t *imp, | |||
4368 | pixman_composite_info_t *info) | |||
4369 | { | |||
4370 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
4371 | uint8_t *dst_line, *dst; | |||
4372 | int dst_stride; | |||
4373 | int32_t w; | |||
4374 | uint32_t src; | |||
4375 | ||||
4376 | __m128i xmm_src; | |||
4377 | ||||
4378 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
4379 | dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
4380 | ||||
4381 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
4382 | ||||
4383 | src >>= 24; | |||
4384 | ||||
4385 | if (src == 0x00) | |||
4386 | return; | |||
4387 | ||||
4388 | if (src == 0xff) | |||
4389 | { | |||
4390 | pixman_fill_moz_pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, | |||
4391 | 8, dest_x, dest_y, width, height, 0xff); | |||
4392 | ||||
4393 | return; | |||
4394 | } | |||
4395 | ||||
4396 | src = (src << 24) | (src << 16) | (src << 8) | src; | |||
4397 | xmm_src = _mm_set_epi32 (src, src, src, src); | |||
4398 | ||||
4399 | while (height--) | |||
4400 | { | |||
4401 | dst = dst_line; | |||
4402 | dst_line += dst_stride; | |||
4403 | w = width; | |||
4404 | ||||
4405 | while (w && ((uintptr_t)dst & 15)) | |||
4406 | { | |||
4407 | *dst = (uint8_t)_mm_cvtsi128_si32 ( | |||
4408 | _mm_adds_epu8 ( | |||
4409 | xmm_src, | |||
4410 | _mm_cvtsi32_si128 (*dst))); | |||
4411 | ||||
4412 | w--; | |||
4413 | dst++; | |||
4414 | } | |||
4415 | ||||
4416 | while (w >= 16) | |||
4417 | { | |||
4418 | save_128_aligned ( | |||
4419 | (__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); | |||
4420 | ||||
4421 | dst += 16; | |||
4422 | w -= 16; | |||
4423 | } | |||
4424 | ||||
4425 | while (w) | |||
4426 | { | |||
4427 | *dst = (uint8_t)_mm_cvtsi128_si32 ( | |||
4428 | _mm_adds_epu8 ( | |||
4429 | xmm_src, | |||
4430 | _mm_cvtsi32_si128 (*dst))); | |||
4431 | ||||
4432 | w--; | |||
4433 | dst++; | |||
4434 | } | |||
4435 | } | |||
4436 | ||||
4437 | } | |||
4438 | ||||
4439 | static void | |||
4440 | sse2_composite_add_8_8 (pixman_implementation_t *imp, | |||
4441 | pixman_composite_info_t *info) | |||
4442 | { | |||
4443 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
4444 | uint8_t *dst_line, *dst; | |||
4445 | uint8_t *src_line, *src; | |||
4446 | int dst_stride, src_stride; | |||
4447 | int32_t w; | |||
4448 | uint16_t t; | |||
4449 | ||||
4450 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | |||
4451 | src_image, src_x, src_y, uint8_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
4452 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
4453 | dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
4454 | ||||
4455 | while (height--) | |||
4456 | { | |||
4457 | dst = dst_line; | |||
4458 | src = src_line; | |||
4459 | ||||
4460 | dst_line += dst_stride; | |||
4461 | src_line += src_stride; | |||
4462 | w = width; | |||
4463 | ||||
4464 | /* Small head */ | |||
4465 | while (w && (uintptr_t)dst & 3) | |||
4466 | { | |||
4467 | t = (*dst) + (*src++); | |||
4468 | *dst++ = t | (0 - (t >> 8)); | |||
4469 | w--; | |||
4470 | } | |||
4471 | ||||
4472 | sse2_combine_add_u (imp, op, | |||
4473 | (uint32_t*)dst, (uint32_t*)src, NULL((void*)0), w >> 2); | |||
4474 | ||||
4475 | /* Small tail */ | |||
4476 | dst += w & 0xfffc; | |||
4477 | src += w & 0xfffc; | |||
4478 | ||||
4479 | w &= 3; | |||
4480 | ||||
4481 | while (w) | |||
4482 | { | |||
4483 | t = (*dst) + (*src++); | |||
4484 | *dst++ = t | (0 - (t >> 8)); | |||
4485 | w--; | |||
4486 | } | |||
4487 | } | |||
4488 | ||||
4489 | } | |||
4490 | ||||
4491 | static void | |||
4492 | sse2_composite_add_8888_8888 (pixman_implementation_t *imp, | |||
4493 | pixman_composite_info_t *info) | |||
4494 | { | |||
4495 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
4496 | uint32_t *dst_line, *dst; | |||
4497 | uint32_t *src_line, *src; | |||
4498 | int dst_stride, src_stride; | |||
4499 | ||||
4500 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | |||
4501 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
4502 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
4503 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
4504 | ||||
4505 | while (height--) | |||
4506 | { | |||
4507 | dst = dst_line; | |||
4508 | dst_line += dst_stride; | |||
4509 | src = src_line; | |||
4510 | src_line += src_stride; | |||
4511 | ||||
4512 | sse2_combine_add_u (imp, op, dst, src, NULL((void*)0), width); | |||
4513 | } | |||
4514 | } | |||
4515 | ||||
4516 | static void | |||
4517 | sse2_composite_add_n_8888 (pixman_implementation_t *imp, | |||
4518 | pixman_composite_info_t *info) | |||
4519 | { | |||
4520 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
4521 | uint32_t *dst_line, *dst, src; | |||
4522 | int dst_stride; | |||
4523 | ||||
4524 | __m128i xmm_src; | |||
4525 | ||||
4526 | PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
4527 | ||||
4528 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
4529 | if (src == 0) | |||
4530 | return; | |||
4531 | ||||
4532 | if (src == ~0) | |||
4533 | { | |||
4534 | pixman_fill_moz_pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32, | |||
4535 | dest_x, dest_y, width, height, ~0); | |||
4536 | ||||
4537 | return; | |||
4538 | } | |||
4539 | ||||
4540 | xmm_src = _mm_set_epi32 (src, src, src, src); | |||
4541 | while (height--) | |||
4542 | { | |||
4543 | int w = width; | |||
4544 | uint32_t d; | |||
4545 | ||||
4546 | dst = dst_line; | |||
4547 | dst_line += dst_stride; | |||
4548 | ||||
4549 | while (w && (uintptr_t)dst & 15) | |||
4550 | { | |||
4551 | d = *dst; | |||
4552 | *dst++ = | |||
4553 | _mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d))); | |||
4554 | w--; | |||
4555 | } | |||
4556 | ||||
4557 | while (w >= 4) | |||
4558 | { | |||
4559 | save_128_aligned | |||
4560 | ((__m128i*)dst, | |||
4561 | _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); | |||
4562 | ||||
4563 | dst += 4; | |||
4564 | w -= 4; | |||
4565 | } | |||
4566 | ||||
4567 | while (w--) | |||
4568 | { | |||
4569 | d = *dst; | |||
4570 | *dst++ = | |||
4571 | _mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src, | |||
4572 | _mm_cvtsi32_si128 (d))); | |||
4573 | } | |||
4574 | } | |||
4575 | } | |||
4576 | ||||
4577 | static void | |||
4578 | sse2_composite_add_n_8_8888 (pixman_implementation_t *imp, | |||
4579 | pixman_composite_info_t *info) | |||
4580 | { | |||
4581 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
4582 | uint32_t *dst_line, *dst; | |||
4583 | uint8_t *mask_line, *mask; | |||
4584 | int dst_stride, mask_stride; | |||
4585 | int32_t w; | |||
4586 | uint32_t src; | |||
4587 | ||||
4588 | __m128i xmm_src; | |||
4589 | ||||
4590 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
4591 | if (src == 0) | |||
4592 | return; | |||
4593 | xmm_src = expand_pixel_32_1x128 (src); | |||
4594 | ||||
4595 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
4596 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
4597 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | |||
4598 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | |||
4599 | ||||
4600 | while (height--) | |||
4601 | { | |||
4602 | dst = dst_line; | |||
4603 | dst_line += dst_stride; | |||
4604 | mask = mask_line; | |||
4605 | mask_line += mask_stride; | |||
4606 | w = width; | |||
4607 | ||||
4608 | while (w && ((uintptr_t)dst & 15)) | |||
4609 | { | |||
4610 | uint8_t m = *mask++; | |||
4611 | if (m) | |||
4612 | { | |||
4613 | *dst = pack_1x128_32 | |||
4614 | (_mm_adds_epu16 | |||
4615 | (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), | |||
4616 | unpack_32_1x128 (*dst))); | |||
4617 | } | |||
4618 | dst++; | |||
4619 | w--; | |||
4620 | } | |||
4621 | ||||
4622 | while (w >= 4) | |||
4623 | { | |||
4624 | uint32_t m; | |||
4625 | memcpy(&m, mask, sizeof(uint32_t)); | |||
4626 | ||||
4627 | if (m) | |||
4628 | { | |||
4629 | __m128i xmm_mask_lo, xmm_mask_hi; | |||
4630 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
4631 | ||||
4632 | __m128i xmm_dst = load_128_aligned ((__m128i*)dst); | |||
4633 | __m128i xmm_mask = | |||
4634 | _mm_unpacklo_epi8 (unpack_32_1x128(m), | |||
4635 | _mm_setzero_si128 ()); | |||
4636 | ||||
4637 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
4638 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
4639 | ||||
4640 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, | |||
4641 | &xmm_mask_lo, &xmm_mask_hi); | |||
4642 | ||||
4643 | pix_multiply_2x128 (&xmm_src, &xmm_src, | |||
4644 | &xmm_mask_lo, &xmm_mask_hi, | |||
4645 | &xmm_mask_lo, &xmm_mask_hi); | |||
4646 | ||||
4647 | xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); | |||
4648 | xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); | |||
4649 | ||||
4650 | save_128_aligned ( | |||
4651 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
4652 | } | |||
4653 | ||||
4654 | w -= 4; | |||
4655 | dst += 4; | |||
4656 | mask += 4; | |||
4657 | } | |||
4658 | ||||
4659 | while (w) | |||
4660 | { | |||
4661 | uint8_t m = *mask++; | |||
4662 | if (m) | |||
4663 | { | |||
4664 | *dst = pack_1x128_32 | |||
4665 | (_mm_adds_epu16 | |||
4666 | (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), | |||
4667 | unpack_32_1x128 (*dst))); | |||
4668 | } | |||
4669 | dst++; | |||
4670 | w--; | |||
4671 | } | |||
4672 | } | |||
4673 | } | |||
4674 | ||||
4675 | static pixman_bool_t | |||
4676 | sse2_blt (pixman_implementation_t *imp, | |||
4677 | uint32_t * src_bits, | |||
4678 | uint32_t * dst_bits, | |||
4679 | int src_stride, | |||
4680 | int dst_stride, | |||
4681 | int src_bpp, | |||
4682 | int dst_bpp, | |||
4683 | int src_x, | |||
4684 | int src_y, | |||
4685 | int dest_x, | |||
4686 | int dest_y, | |||
4687 | int width, | |||
4688 | int height) | |||
4689 | { | |||
4690 | uint8_t * src_bytes; | |||
4691 | uint8_t * dst_bytes; | |||
4692 | int byte_width; | |||
4693 | ||||
4694 | if (src_bpp != dst_bpp) | |||
4695 | return FALSE0; | |||
4696 | ||||
4697 | if (src_bpp == 16) | |||
4698 | { | |||
4699 | src_stride = src_stride * (int) sizeof (uint32_t) / 2; | |||
4700 | dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; | |||
4701 | src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); | |||
4702 | dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); | |||
4703 | byte_width = 2 * width; | |||
4704 | src_stride *= 2; | |||
4705 | dst_stride *= 2; | |||
4706 | } | |||
4707 | else if (src_bpp == 32) | |||
4708 | { | |||
4709 | src_stride = src_stride * (int) sizeof (uint32_t) / 4; | |||
4710 | dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; | |||
4711 | src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); | |||
4712 | dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); | |||
4713 | byte_width = 4 * width; | |||
4714 | src_stride *= 4; | |||
4715 | dst_stride *= 4; | |||
4716 | } | |||
4717 | else | |||
4718 | { | |||
4719 | return FALSE0; | |||
4720 | } | |||
4721 | ||||
4722 | while (height--) | |||
4723 | { | |||
4724 | int w; | |||
4725 | uint8_t *s = src_bytes; | |||
4726 | uint8_t *d = dst_bytes; | |||
4727 | src_bytes += src_stride; | |||
4728 | dst_bytes += dst_stride; | |||
4729 | w = byte_width; | |||
4730 | ||||
4731 | while (w >= 2 && ((uintptr_t)d & 3)) | |||
4732 | { | |||
4733 | memmove(d, s, 2); | |||
4734 | w -= 2; | |||
4735 | s += 2; | |||
4736 | d += 2; | |||
4737 | } | |||
4738 | ||||
4739 | while (w >= 4 && ((uintptr_t)d & 15)) | |||
4740 | { | |||
4741 | memmove(d, s, 4); | |||
4742 | ||||
4743 | w -= 4; | |||
4744 | s += 4; | |||
4745 | d += 4; | |||
4746 | } | |||
4747 | ||||
4748 | while (w >= 64) | |||
4749 | { | |||
4750 | __m128i xmm0, xmm1, xmm2, xmm3; | |||
4751 | ||||
4752 | xmm0 = load_128_unaligned ((__m128i*)(s)); | |||
4753 | xmm1 = load_128_unaligned ((__m128i*)(s + 16)); | |||
4754 | xmm2 = load_128_unaligned ((__m128i*)(s + 32)); | |||
4755 | xmm3 = load_128_unaligned ((__m128i*)(s + 48)); | |||
4756 | ||||
4757 | save_128_aligned ((__m128i*)(d), xmm0); | |||
4758 | save_128_aligned ((__m128i*)(d + 16), xmm1); | |||
4759 | save_128_aligned ((__m128i*)(d + 32), xmm2); | |||
4760 | save_128_aligned ((__m128i*)(d + 48), xmm3); | |||
4761 | ||||
4762 | s += 64; | |||
4763 | d += 64; | |||
4764 | w -= 64; | |||
4765 | } | |||
4766 | ||||
4767 | while (w >= 16) | |||
4768 | { | |||
4769 | save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) ); | |||
4770 | ||||
4771 | w -= 16; | |||
4772 | d += 16; | |||
4773 | s += 16; | |||
4774 | } | |||
4775 | ||||
4776 | while (w >= 4) | |||
4777 | { | |||
4778 | memmove(d, s, 4); | |||
4779 | ||||
4780 | w -= 4; | |||
4781 | s += 4; | |||
4782 | d += 4; | |||
4783 | } | |||
4784 | ||||
4785 | if (w >= 2) | |||
4786 | { | |||
4787 | memmove(d, s, 2); | |||
4788 | w -= 2; | |||
4789 | s += 2; | |||
4790 | d += 2; | |||
4791 | } | |||
4792 | } | |||
4793 | ||||
4794 | return TRUE1; | |||
4795 | } | |||
4796 | ||||
4797 | static void | |||
4798 | sse2_composite_copy_area (pixman_implementation_t *imp, | |||
4799 | pixman_composite_info_t *info) | |||
4800 | { | |||
4801 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
4802 | sse2_blt (imp, src_image->bits.bits, | |||
4803 | dest_image->bits.bits, | |||
4804 | src_image->bits.rowstride, | |||
4805 | dest_image->bits.rowstride, | |||
4806 | PIXMAN_FORMAT_BPP (src_image->bits.format)(((src_image->bits.format >> (24)) & ((1 << (8)) - 1)) << ((src_image->bits.format >> 22) & 3)), | |||
4807 | PIXMAN_FORMAT_BPP (dest_image->bits.format)(((dest_image->bits.format >> (24)) & ((1 << (8)) - 1)) << ((dest_image->bits.format >> 22 ) & 3)), | |||
4808 | src_x, src_y, dest_x, dest_y, width, height); | |||
4809 | } | |||
4810 | ||||
4811 | static void | |||
4812 | sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, | |||
4813 | pixman_composite_info_t *info) | |||
4814 | { | |||
4815 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
4816 | uint32_t *src, *src_line, s; | |||
4817 | uint32_t *dst, *dst_line, d; | |||
4818 | uint8_t *mask, *mask_line; | |||
4819 | int src_stride, mask_stride, dst_stride; | |||
4820 | int32_t w; | |||
4821 | __m128i ms; | |||
4822 | ||||
4823 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | |||
4824 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
4825 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | |||
4826 | ||||
4827 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
4828 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
4829 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | |||
4830 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | |||
4831 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | |||
4832 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
4833 | ||||
4834 | while (height--) | |||
4835 | { | |||
4836 | src = src_line; | |||
4837 | src_line += src_stride; | |||
4838 | dst = dst_line; | |||
4839 | dst_line += dst_stride; | |||
4840 | mask = mask_line; | |||
4841 | mask_line += mask_stride; | |||
4842 | ||||
4843 | w = width; | |||
4844 | ||||
4845 | while (w && (uintptr_t)dst & 15) | |||
4846 | { | |||
4847 | uint8_t m = *mask++; | |||
4848 | s = 0xff000000 | *src++; | |||
4849 | d = *dst; | |||
4850 | ms = unpack_32_1x128 (s); | |||
4851 | ||||
4852 | if (m != 0xff) | |||
4853 | { | |||
4854 | __m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); | |||
4855 | __m128i md = unpack_32_1x128 (d); | |||
4856 | ||||
4857 | ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md); | |||
4858 | } | |||
4859 | ||||
4860 | *dst++ = pack_1x128_32 (ms); | |||
4861 | w--; | |||
4862 | } | |||
4863 | ||||
4864 | while (w >= 4) | |||
4865 | { | |||
4866 | uint32_t m; | |||
4867 | memcpy(&m, mask, sizeof(uint32_t)); | |||
4868 | xmm_src = _mm_or_si128 ( | |||
4869 | load_128_unaligned ((__m128i*)src), mask_ff000000); | |||
4870 | ||||
4871 | if (m == 0xffffffff) | |||
4872 | { | |||
4873 | save_128_aligned ((__m128i*)dst, xmm_src); | |||
4874 | } | |||
4875 | else | |||
4876 | { | |||
4877 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
4878 | ||||
4879 | xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); | |||
4880 | ||||
4881 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
4882 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
4883 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
4884 | ||||
4885 | expand_alpha_rev_2x128 ( | |||
4886 | xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
4887 | ||||
4888 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
4889 | &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi, | |||
4890 | &xmm_dst_lo, &xmm_dst_hi); | |||
4891 | ||||
4892 | save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
4893 | } | |||
4894 | ||||
4895 | src += 4; | |||
4896 | dst += 4; | |||
4897 | mask += 4; | |||
4898 | w -= 4; | |||
4899 | } | |||
4900 | ||||
4901 | while (w) | |||
4902 | { | |||
4903 | uint8_t m = *mask++; | |||
4904 | ||||
4905 | if (m) | |||
4906 | { | |||
4907 | s = 0xff000000 | *src; | |||
4908 | ||||
4909 | if (m == 0xff) | |||
4910 | { | |||
4911 | *dst = s; | |||
4912 | } | |||
4913 | else | |||
4914 | { | |||
4915 | __m128i ma, md, ms; | |||
4916 | ||||
4917 | d = *dst; | |||
4918 | ||||
4919 | ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); | |||
4920 | md = unpack_32_1x128 (d); | |||
4921 | ms = unpack_32_1x128 (s); | |||
4922 | ||||
4923 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md)); | |||
4924 | } | |||
4925 | ||||
4926 | } | |||
4927 | ||||
4928 | src++; | |||
4929 | dst++; | |||
4930 | w--; | |||
4931 | } | |||
4932 | } | |||
4933 | ||||
4934 | } | |||
4935 | ||||
4936 | static void | |||
4937 | sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp, | |||
4938 | pixman_composite_info_t *info) | |||
4939 | { | |||
4940 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
4941 | uint32_t *src, *src_line, s; | |||
4942 | uint32_t *dst, *dst_line, d; | |||
4943 | uint8_t *mask, *mask_line; | |||
4944 | int src_stride, mask_stride, dst_stride; | |||
4945 | int32_t w; | |||
4946 | ||||
4947 | __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; | |||
4948 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
4949 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | |||
4950 | ||||
4951 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
4952 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
4953 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | |||
4954 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | |||
4955 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | |||
4956 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
4957 | ||||
4958 | while (height--) | |||
4959 | { | |||
4960 | src = src_line; | |||
4961 | src_line += src_stride; | |||
4962 | dst = dst_line; | |||
4963 | dst_line += dst_stride; | |||
4964 | mask = mask_line; | |||
4965 | mask_line += mask_stride; | |||
4966 | ||||
4967 | w = width; | |||
4968 | ||||
4969 | while (w && (uintptr_t)dst & 15) | |||
4970 | { | |||
4971 | uint32_t sa; | |||
4972 | uint8_t m = *mask++; | |||
4973 | ||||
4974 | s = *src++; | |||
4975 | d = *dst; | |||
4976 | ||||
4977 | sa = s >> 24; | |||
4978 | ||||
4979 | if (m) | |||
4980 | { | |||
4981 | if (sa == 0xff && m == 0xff) | |||
4982 | { | |||
4983 | *dst = s; | |||
4984 | } | |||
4985 | else | |||
4986 | { | |||
4987 | __m128i ms, md, ma, msa; | |||
4988 | ||||
4989 | ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); | |||
4990 | ms = unpack_32_1x128 (s); | |||
4991 | md = unpack_32_1x128 (d); | |||
4992 | ||||
4993 | msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); | |||
4994 | ||||
4995 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); | |||
4996 | } | |||
4997 | } | |||
4998 | ||||
4999 | dst++; | |||
5000 | w--; | |||
5001 | } | |||
5002 | ||||
5003 | while (w >= 4) | |||
5004 | { | |||
5005 | uint32_t m; | |||
5006 | memcpy(&m, mask, sizeof(uint32_t)); | |||
5007 | ||||
5008 | if (m) | |||
5009 | { | |||
5010 | xmm_src = load_128_unaligned ((__m128i*)src); | |||
5011 | ||||
5012 | if (m == 0xffffffff && is_opaque (xmm_src)) | |||
5013 | { | |||
5014 | save_128_aligned ((__m128i *)dst, xmm_src); | |||
5015 | } | |||
5016 | else | |||
5017 | { | |||
5018 | xmm_dst = load_128_aligned ((__m128i *)dst); | |||
5019 | ||||
5020 | xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); | |||
5021 | ||||
5022 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
5023 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
5024 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
5025 | ||||
5026 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); | |||
5027 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
5028 | ||||
5029 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, | |||
5030 | &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
5031 | ||||
5032 | save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
5033 | } | |||
5034 | } | |||
5035 | ||||
5036 | src += 4; | |||
5037 | dst += 4; | |||
5038 | mask += 4; | |||
5039 | w -= 4; | |||
5040 | } | |||
5041 | ||||
5042 | while (w) | |||
5043 | { | |||
5044 | uint32_t sa; | |||
5045 | uint8_t m = *mask++; | |||
5046 | ||||
5047 | s = *src++; | |||
5048 | d = *dst; | |||
5049 | ||||
5050 | sa = s >> 24; | |||
5051 | ||||
5052 | if (m) | |||
5053 | { | |||
5054 | if (sa == 0xff && m == 0xff) | |||
5055 | { | |||
5056 | *dst = s; | |||
5057 | } | |||
5058 | else | |||
5059 | { | |||
5060 | __m128i ms, md, ma, msa; | |||
5061 | ||||
5062 | ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); | |||
5063 | ms = unpack_32_1x128 (s); | |||
5064 | md = unpack_32_1x128 (d); | |||
5065 | ||||
5066 | msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); | |||
5067 | ||||
5068 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); | |||
5069 | } | |||
5070 | } | |||
5071 | ||||
5072 | dst++; | |||
5073 | w--; | |||
5074 | } | |||
5075 | } | |||
5076 | ||||
5077 | } | |||
5078 | ||||
5079 | static void | |||
5080 | sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp, | |||
5081 | pixman_composite_info_t *info) | |||
5082 | { | |||
5083 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
5084 | uint32_t src; | |||
5085 | uint32_t *dst_line, *dst; | |||
5086 | __m128i xmm_src; | |||
5087 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
5088 | __m128i xmm_dsta_hi, xmm_dsta_lo; | |||
5089 | int dst_stride; | |||
5090 | int32_t w; | |||
5091 | ||||
5092 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | |||
5093 | ||||
5094 | if (src == 0) | |||
5095 | return; | |||
5096 | ||||
5097 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
5098 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
5099 | ||||
5100 | xmm_src = expand_pixel_32_1x128 (src); | |||
5101 | ||||
5102 | while (height--) | |||
5103 | { | |||
5104 | dst = dst_line; | |||
5105 | ||||
5106 | dst_line += dst_stride; | |||
5107 | w = width; | |||
5108 | ||||
5109 | while (w && (uintptr_t)dst & 15) | |||
5110 | { | |||
5111 | __m128i vd; | |||
5112 | ||||
5113 | vd = unpack_32_1x128 (*dst); | |||
5114 | ||||
5115 | *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd), | |||
5116 | xmm_src)); | |||
5117 | w--; | |||
5118 | dst++; | |||
5119 | } | |||
5120 | ||||
5121 | while (w >= 4) | |||
5122 | { | |||
5123 | __m128i tmp_lo, tmp_hi; | |||
5124 | ||||
5125 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
5126 | ||||
5127 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
5128 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi); | |||
5129 | ||||
5130 | tmp_lo = xmm_src; | |||
5131 | tmp_hi = xmm_src; | |||
5132 | ||||
5133 | over_2x128 (&xmm_dst_lo, &xmm_dst_hi, | |||
5134 | &xmm_dsta_lo, &xmm_dsta_hi, | |||
5135 | &tmp_lo, &tmp_hi); | |||
5136 | ||||
5137 | save_128_aligned ( | |||
5138 | (__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi)); | |||
5139 | ||||
5140 | w -= 4; | |||
5141 | dst += 4; | |||
5142 | } | |||
5143 | ||||
5144 | while (w) | |||
5145 | { | |||
5146 | __m128i vd; | |||
5147 | ||||
5148 | vd = unpack_32_1x128 (*dst); | |||
5149 | ||||
5150 | *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd), | |||
5151 | xmm_src)); | |||
5152 | w--; | |||
5153 | dst++; | |||
5154 | } | |||
5155 | ||||
5156 | } | |||
5157 | ||||
5158 | } | |||
5159 | ||||
5160 | static void | |||
5161 | sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp, | |||
5162 | pixman_composite_info_t *info) | |||
5163 | { | |||
5164 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | |||
5165 | uint32_t *src, *src_line, s; | |||
5166 | uint32_t *dst, *dst_line, d; | |||
5167 | uint32_t *mask, *mask_line; | |||
5168 | uint32_t m; | |||
5169 | int src_stride, mask_stride, dst_stride; | |||
5170 | int32_t w; | |||
5171 | ||||
5172 | __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; | |||
5173 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
5174 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | |||
5175 | ||||
5176 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | |||
5177 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | |||
5178 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | |||
5179 | mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | |||
5180 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | |||
5181 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | |||
5182 | ||||
5183 | while (height--) | |||
5184 | { | |||
5185 | src = src_line; | |||
5186 | src_line += src_stride; | |||
5187 | dst = dst_line; | |||
5188 | dst_line += dst_stride; | |||
5189 | mask = mask_line; | |||
5190 | mask_line += mask_stride; | |||
5191 | ||||
5192 | w = width; | |||
5193 | ||||
5194 | while (w && (uintptr_t)dst & 15) | |||
5195 | { | |||
5196 | uint32_t sa; | |||
5197 | ||||
5198 | s = *src++; | |||
5199 | m = (*mask++) >> 24; | |||
5200 | d = *dst; | |||
5201 | ||||
5202 | sa = s >> 24; | |||
5203 | ||||
5204 | if (m) | |||
5205 | { | |||
5206 | if (sa == 0xff && m == 0xff) | |||
5207 | { | |||
5208 | *dst = s; | |||
5209 | } | |||
5210 | else | |||
5211 | { | |||
5212 | __m128i ms, md, ma, msa; | |||
5213 | ||||
5214 | ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); | |||
5215 | ms = unpack_32_1x128 (s); | |||
5216 | md = unpack_32_1x128 (d); | |||
5217 | ||||
5218 | msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); | |||
5219 | ||||
5220 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); | |||
5221 | } | |||
5222 | } | |||
5223 | ||||
5224 | dst++; | |||
5225 | w--; | |||
5226 | } | |||
5227 | ||||
5228 | while (w >= 4) | |||
5229 | { | |||
5230 | xmm_mask = load_128_unaligned ((__m128i*)mask); | |||
5231 | ||||
5232 | if (!is_transparent (xmm_mask)) | |||
5233 | { | |||
5234 | xmm_src = load_128_unaligned ((__m128i*)src); | |||
5235 | ||||
5236 | if (is_opaque (xmm_mask) && is_opaque (xmm_src)) | |||
5237 | { | |||
5238 | save_128_aligned ((__m128i *)dst, xmm_src); | |||
5239 | } | |||
5240 | else | |||
5241 | { | |||
5242 | xmm_dst = load_128_aligned ((__m128i *)dst); | |||
5243 | ||||
5244 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
5245 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
5246 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
5247 | ||||
5248 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); | |||
5249 | expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
5250 | ||||
5251 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, | |||
5252 | &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
5253 | ||||
5254 | save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
5255 | } | |||
5256 | } | |||
5257 | ||||
5258 | src += 4; | |||
5259 | dst += 4; | |||
5260 | mask += 4; | |||
5261 | w -= 4; | |||
5262 | } | |||
5263 | ||||
5264 | while (w) | |||
5265 | { | |||
5266 | uint32_t sa; | |||
5267 | ||||
5268 | s = *src++; | |||
5269 | m = (*mask++) >> 24; | |||
5270 | d = *dst; | |||
5271 | ||||
5272 | sa = s >> 24; | |||
5273 | ||||
5274 | if (m) | |||
5275 | { | |||
5276 | if (sa == 0xff && m == 0xff) | |||
5277 | { | |||
5278 | *dst = s; | |||
5279 | } | |||
5280 | else | |||
5281 | { | |||
5282 | __m128i ms, md, ma, msa; | |||
5283 | ||||
5284 | ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); | |||
5285 | ms = unpack_32_1x128 (s); | |||
5286 | md = unpack_32_1x128 (d); | |||
5287 | ||||
5288 | msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); | |||
5289 | ||||
5290 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); | |||
5291 | } | |||
5292 | } | |||
5293 | ||||
5294 | dst++; | |||
5295 | w--; | |||
5296 | } | |||
5297 | } | |||
5298 | ||||
5299 | } | |||
5300 | ||||
5301 | /* A variant of 'sse2_combine_over_u' with minor tweaks */ | |||
5302 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
5303 | scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd, | |||
5304 | const uint32_t* ps, | |||
5305 | int32_t w, | |||
5306 | pixman_fixed_t vx, | |||
5307 | pixman_fixed_t unit_x, | |||
5308 | pixman_fixed_t src_width_fixed, | |||
5309 | pixman_bool_t fully_transparent_src) | |||
5310 | { | |||
5311 | uint32_t s, d; | |||
5312 | const uint32_t* pm = NULL((void*)0); | |||
5313 | ||||
5314 | __m128i xmm_dst_lo, xmm_dst_hi; | |||
5315 | __m128i xmm_src_lo, xmm_src_hi; | |||
5316 | __m128i xmm_alpha_lo, xmm_alpha_hi; | |||
5317 | ||||
5318 | if (fully_transparent_src) | |||
5319 | return; | |||
5320 | ||||
5321 | /* Align dst on a 16-byte boundary */ | |||
5322 | while (w && ((uintptr_t)pd & 15)) | |||
5323 | { | |||
5324 | d = *pd; | |||
5325 | s = combine1 (ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)), pm); | |||
5326 | vx += unit_x; | |||
5327 | while (vx >= 0) | |||
5328 | vx -= src_width_fixed; | |||
5329 | ||||
5330 | *pd++ = core_combine_over_u_pixel_sse2 (s, d); | |||
5331 | if (pm) | |||
5332 | pm++; | |||
5333 | w--; | |||
5334 | } | |||
5335 | ||||
5336 | while (w >= 4) | |||
5337 | { | |||
5338 | __m128i tmp; | |||
5339 | uint32_t tmp1, tmp2, tmp3, tmp4; | |||
5340 | ||||
5341 | tmp1 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | |||
5342 | vx += unit_x; | |||
5343 | while (vx >= 0) | |||
5344 | vx -= src_width_fixed; | |||
5345 | tmp2 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | |||
5346 | vx += unit_x; | |||
5347 | while (vx >= 0) | |||
5348 | vx -= src_width_fixed; | |||
5349 | tmp3 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | |||
5350 | vx += unit_x; | |||
5351 | while (vx >= 0) | |||
5352 | vx -= src_width_fixed; | |||
5353 | tmp4 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | |||
5354 | vx += unit_x; | |||
5355 | while (vx >= 0) | |||
5356 | vx -= src_width_fixed; | |||
5357 | ||||
5358 | tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1); | |||
5359 | ||||
5360 | xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm); | |||
5361 | ||||
5362 | if (is_opaque (xmm_src_hi)) | |||
5363 | { | |||
5364 | save_128_aligned ((__m128i*)pd, xmm_src_hi); | |||
5365 | } | |||
5366 | else if (!is_zero (xmm_src_hi)) | |||
5367 | { | |||
5368 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | |||
5369 | ||||
5370 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | |||
5371 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
5372 | ||||
5373 | expand_alpha_2x128 ( | |||
5374 | xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi); | |||
5375 | ||||
5376 | over_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
5377 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
5378 | &xmm_dst_lo, &xmm_dst_hi); | |||
5379 | ||||
5380 | /* rebuid the 4 pixel data and save*/ | |||
5381 | save_128_aligned ((__m128i*)pd, | |||
5382 | pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
5383 | } | |||
5384 | ||||
5385 | w -= 4; | |||
5386 | pd += 4; | |||
5387 | if (pm) | |||
5388 | pm += 4; | |||
5389 | } | |||
5390 | ||||
5391 | while (w) | |||
5392 | { | |||
5393 | d = *pd; | |||
5394 | s = combine1 (ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)), pm); | |||
5395 | vx += unit_x; | |||
5396 | while (vx >= 0) | |||
5397 | vx -= src_width_fixed; | |||
5398 | ||||
5399 | *pd++ = core_combine_over_u_pixel_sse2 (s, d); | |||
5400 | if (pm) | |||
5401 | pm++; | |||
5402 | ||||
5403 | w--; | |||
5404 | } | |||
5405 | } | |||
5406 | ||||
5407 | FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD , &y, src_image->bits.height); src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5408 | scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD , &y, src_image->bits.height); src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5409 | uint32_t, uint32_t, COVER)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD , &y, src_image->bits.height); src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5410 | FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER ( pixman_implementation_t *imp, pixman_composite_info_t *info) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = ( (pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5411 | scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER ( pixman_implementation_t *imp, pixman_composite_info_t *info) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = ( (pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5412 | uint32_t, uint32_t, NONE)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER ( pixman_implementation_t *imp, pixman_composite_info_t *info) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = ( (pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5413 | FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER ( pixman_implementation_t *imp, pixman_composite_info_t *info) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = ( (pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5414 | scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER ( pixman_implementation_t *imp, pixman_composite_info_t *info) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = ( (pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5415 | uint32_t, uint32_t, PAD)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER ( pixman_implementation_t *imp, pixman_composite_info_t *info) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = ( (pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5416 | FAST_NEAREST_MAINLOOP (sse2_8888_8888_normal_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5417 | scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5418 | uint32_t, uint32_t, NORMAL)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5419 | ||||
5420 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
5421 | scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask, | |||
5422 | uint32_t * dst, | |||
5423 | const uint32_t * src, | |||
5424 | int32_t w, | |||
5425 | pixman_fixed_t vx, | |||
5426 | pixman_fixed_t unit_x, | |||
5427 | pixman_fixed_t src_width_fixed, | |||
5428 | pixman_bool_t zero_src) | |||
5429 | { | |||
5430 | __m128i xmm_mask; | |||
5431 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | |||
5432 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
5433 | __m128i xmm_alpha_lo, xmm_alpha_hi; | |||
5434 | ||||
5435 | if (zero_src || (*mask >> 24) == 0) | |||
5436 | return; | |||
5437 | ||||
5438 | xmm_mask = create_mask_16_128 (*mask >> 24); | |||
5439 | ||||
5440 | while (w && (uintptr_t)dst & 15) | |||
5441 | { | |||
5442 | uint32_t s = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | |||
5443 | vx += unit_x; | |||
5444 | while (vx >= 0) | |||
5445 | vx -= src_width_fixed; | |||
5446 | ||||
5447 | if (s) | |||
5448 | { | |||
5449 | uint32_t d = *dst; | |||
5450 | ||||
5451 | __m128i ms = unpack_32_1x128 (s); | |||
5452 | __m128i alpha = expand_alpha_1x128 (ms); | |||
5453 | __m128i dest = xmm_mask; | |||
5454 | __m128i alpha_dst = unpack_32_1x128 (d); | |||
5455 | ||||
5456 | *dst = pack_1x128_32 ( | |||
5457 | in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); | |||
5458 | } | |||
5459 | dst++; | |||
5460 | w--; | |||
5461 | } | |||
5462 | ||||
5463 | while (w >= 4) | |||
5464 | { | |||
5465 | uint32_t tmp1, tmp2, tmp3, tmp4; | |||
5466 | ||||
5467 | tmp1 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | |||
5468 | vx += unit_x; | |||
5469 | while (vx >= 0) | |||
5470 | vx -= src_width_fixed; | |||
5471 | tmp2 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | |||
5472 | vx += unit_x; | |||
5473 | while (vx >= 0) | |||
5474 | vx -= src_width_fixed; | |||
5475 | tmp3 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | |||
5476 | vx += unit_x; | |||
5477 | while (vx >= 0) | |||
5478 | vx -= src_width_fixed; | |||
5479 | tmp4 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | |||
5480 | vx += unit_x; | |||
5481 | while (vx >= 0) | |||
5482 | vx -= src_width_fixed; | |||
5483 | ||||
5484 | xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1); | |||
5485 | ||||
5486 | if (!is_zero (xmm_src)) | |||
5487 | { | |||
5488 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
5489 | ||||
5490 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
5491 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
5492 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
5493 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
5494 | ||||
5495 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
5496 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
5497 | &xmm_mask, &xmm_mask, | |||
5498 | &xmm_dst_lo, &xmm_dst_hi); | |||
5499 | ||||
5500 | save_128_aligned ( | |||
5501 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
5502 | } | |||
5503 | ||||
5504 | dst += 4; | |||
5505 | w -= 4; | |||
5506 | } | |||
5507 | ||||
5508 | while (w) | |||
5509 | { | |||
5510 | uint32_t s = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | |||
5511 | vx += unit_x; | |||
5512 | while (vx >= 0) | |||
5513 | vx -= src_width_fixed; | |||
5514 | ||||
5515 | if (s) | |||
5516 | { | |||
5517 | uint32_t d = *dst; | |||
5518 | ||||
5519 | __m128i ms = unpack_32_1x128 (s); | |||
5520 | __m128i alpha = expand_alpha_1x128 (ms); | |||
5521 | __m128i mask = xmm_mask; | |||
5522 | __m128i dest = unpack_32_1x128 (d); | |||
5523 | ||||
5524 | *dst = pack_1x128_32 ( | |||
5525 | in_over_1x128 (&ms, &alpha, &mask, &dest)); | |||
5526 | } | |||
5527 | ||||
5528 | dst++; | |||
5529 | w--; | |||
5530 | } | |||
5531 | ||||
5532 | } | |||
5533 | ||||
5534 | FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD , &y, src_image->bits.height); src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5535 | scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD , &y, src_image->bits.height); src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5536 | uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD , &y, src_image->bits.height); src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5537 | FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = ( (pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5538 | scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = ( (pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5539 | uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = ( (pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5540 | FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = ( (pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5541 | scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = ( (pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5542 | uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = ( (pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5543 | FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5544 | scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5545 | uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1 ] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1] ; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t) ((uint32_t) (src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | |||
5546 | ||||
5547 | #if PSHUFD_IS_FAST0 | |||
5548 | ||||
5549 | /***********************************************************************************/ | |||
5550 | ||||
5551 | # define BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)) \ | |||
5552 | const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ | |||
5553 | const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ | |||
5554 | const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ | |||
5555 | const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ | |||
5556 | unit_x, -unit_x, unit_x, -unit_x); \ | |||
5557 | const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \ | |||
5558 | unit_x * 4, -unit_x * 4, \ | |||
5559 | unit_x * 4, -unit_x * 4, \ | |||
5560 | unit_x * 4, -unit_x * 4); \ | |||
5561 | const __m128i xmm_zero = _mm_setzero_si128 (); \ | |||
5562 | __m128i xmm_x = _mm_set_epi16 (vx + unit_x * 3, -(vx + 1) - unit_x * 3, \ | |||
5563 | vx + unit_x * 2, -(vx + 1) - unit_x * 2, \ | |||
5564 | vx + unit_x * 1, -(vx + 1) - unit_x * 1, \ | |||
5565 | vx + unit_x * 0, -(vx + 1) - unit_x * 0); \ | |||
5566 | __m128i xmm_wh_state; | |||
5567 | ||||
5568 | #define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase_)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0) \ | |||
5569 | do { \ | |||
5570 | int phase = phase_; \ | |||
5571 | __m128i xmm_wh, xmm_a, xmm_b; \ | |||
5572 | /* fetch 2x2 pixel block into sse2 registers */ \ | |||
5573 | __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \ | |||
5574 | __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \ | |||
5575 | vx += unit_x; \ | |||
5576 | /* vertical interpolation */ \ | |||
5577 | xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \ | |||
5578 | xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \ | |||
5579 | xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \ | |||
5580 | /* calculate horizontal weights */ \ | |||
5581 | if (phase <= 0) \ | |||
5582 | { \ | |||
5583 | xmm_wh_state = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ | |||
5584 | 16 - BILINEAR_INTERPOLATION_BITS7)); \ | |||
5585 | xmm_x = _mm_add_epi16 (xmm_x, (phase < 0) ? xmm_ux1 : xmm_ux4); \ | |||
5586 | phase = 0; \ | |||
5587 | } \ | |||
5588 | xmm_wh = _mm_shuffle_epi32 (xmm_wh_state, _MM_SHUFFLE (phase, phase, \((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(xmm_wh_state ), (int)((((phase) << 6) | ((phase) << 4) | ((phase ) << 2) | (phase))))) | |||
5589 | phase, phase))((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(xmm_wh_state ), (int)((((phase) << 6) | ((phase) << 4) | ((phase ) << 2) | (phase))))); \ | |||
5590 | /* horizontal interpolation */ \ | |||
5591 | xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(xmm_a), (int )((((1) << 6) | ((0) << 4) | ((3) << 2) | ( 2))))) | |||
5592 | xmm_a, _MM_SHUFFLE (1, 0, 3, 2))((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(xmm_a), (int )((((1) << 6) | ((0) << 4) | ((3) << 2) | ( 2))))), xmm_a), xmm_wh); \ | |||
5593 | /* shift the result */ \ | |||
5594 | pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS7 * 2); \ | |||
5595 | } while (0) | |||
5596 | ||||
5597 | #else /************************************************************************/ | |||
5598 | ||||
5599 | # define BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)) \ | |||
5600 | const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ | |||
5601 | const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ | |||
5602 | const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ | |||
5603 | const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ | |||
5604 | unit_x, -unit_x, unit_x, -unit_x); \ | |||
5605 | const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \ | |||
5606 | unit_x * 4, -unit_x * 4, \ | |||
5607 | unit_x * 4, -unit_x * 4, \ | |||
5608 | unit_x * 4, -unit_x * 4); \ | |||
5609 | const __m128i xmm_zero = _mm_setzero_si128 (); \ | |||
5610 | __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \ | |||
5611 | vx, -(vx + 1), vx, -(vx + 1)) | |||
5612 | ||||
5613 | #define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0) \ | |||
5614 | do { \ | |||
5615 | __m128i xmm_wh, xmm_a, xmm_b; \ | |||
5616 | /* fetch 2x2 pixel block into sse2 registers */ \ | |||
5617 | __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \ | |||
5618 | __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \ | |||
5619 | (void)xmm_ux4; /* suppress warning: unused variable 'xmm_ux4' */ \ | |||
5620 | vx += unit_x; \ | |||
5621 | /* vertical interpolation */ \ | |||
5622 | xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \ | |||
5623 | xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \ | |||
5624 | xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \ | |||
5625 | /* calculate horizontal weights */ \ | |||
5626 | xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ | |||
5627 | 16 - BILINEAR_INTERPOLATION_BITS7)); \ | |||
5628 | xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \ | |||
5629 | /* horizontal interpolation */ \ | |||
5630 | xmm_b = _mm_unpacklo_epi64 (/* any value is fine here */ xmm_b, xmm_a); \ | |||
5631 | xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); \ | |||
5632 | /* shift the result */ \ | |||
5633 | pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS7 * 2); \ | |||
5634 | } while (0) | |||
5635 | ||||
5636 | /***********************************************************************************/ | |||
5637 | ||||
5638 | #endif | |||
5639 | ||||
5640 | #define BILINEAR_INTERPOLATE_ONE_PIXEL(pix); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix = _mm_cvtsi128_si32 (xmm_pix); } while(0); \ | |||
5641 | do { \ | |||
5642 | __m128i xmm_pix; \ | |||
5643 | BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix, -1)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2 ); } while (0); \ | |||
5644 | xmm_pix = _mm_packs_epi32 (xmm_pix, xmm_pix); \ | |||
5645 | xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); \ | |||
5646 | pix = _mm_cvtsi128_si32 (xmm_pix); \ | |||
5647 | } while(0) | |||
5648 | ||||
5649 | #define BILINEAR_INTERPOLATE_FOUR_PIXELS(pix); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1 , xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0); \ | |||
5650 | do { \ | |||
5651 | __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; \ | |||
5652 | BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix1, 0)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); \ | |||
5653 | BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix2, 1)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); \ | |||
5654 | BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix3, 2)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); \ | |||
5655 | BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix4, 3)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); \ | |||
5656 | xmm_pix1 = _mm_packs_epi32 (xmm_pix1, xmm_pix2); \ | |||
5657 | xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); \ | |||
5658 | pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3); \ | |||
5659 | } while(0) | |||
5660 | ||||
5661 | #define BILINEAR_SKIP_ONE_PIXEL()do { vx += unit_x; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); } while (0) \ | |||
5662 | do { \ | |||
5663 | vx += unit_x; \ | |||
5664 | xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \ | |||
5665 | } while(0) | |||
5666 | ||||
5667 | #define BILINEAR_SKIP_FOUR_PIXELS()do { vx += unit_x * 4; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4) ; } while(0) \ | |||
5668 | do { \ | |||
5669 | vx += unit_x * 4; \ | |||
5670 | xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4); \ | |||
5671 | } while(0) | |||
5672 | ||||
5673 | /***********************************************************************************/ | |||
5674 | ||||
5675 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
5676 | scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst, | |||
5677 | const uint32_t * mask, | |||
5678 | const uint32_t * src_top, | |||
5679 | const uint32_t * src_bottom, | |||
5680 | int32_t w, | |||
5681 | int wt, | |||
5682 | int wb, | |||
5683 | pixman_fixed_t vx_, | |||
5684 | pixman_fixed_t unit_x_, | |||
5685 | pixman_fixed_t max_vx, | |||
5686 | pixman_bool_t zero_src) | |||
5687 | { | |||
5688 | intptr_t vx = vx_; | |||
5689 | intptr_t unit_x = unit_x_; | |||
5690 | BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)); | |||
5691 | uint32_t pix1, pix2; | |||
5692 | ||||
5693 | while (w && ((uintptr_t)dst & 15)) | |||
5694 | { | |||
5695 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
5696 | *dst++ = pix1; | |||
5697 | w--; | |||
5698 | } | |||
5699 | ||||
5700 | while ((w -= 4) >= 0) { | |||
5701 | __m128i xmm_src; | |||
5702 | BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1 , xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0); | |||
5703 | _mm_store_si128 ((__m128i *)dst, xmm_src); | |||
5704 | dst += 4; | |||
5705 | } | |||
5706 | ||||
5707 | if (w & 2) | |||
5708 | { | |||
5709 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
5710 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix2 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
5711 | *dst++ = pix1; | |||
5712 | *dst++ = pix2; | |||
5713 | } | |||
5714 | ||||
5715 | if (w & 1) | |||
5716 | { | |||
5717 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
5718 | *dst = pix1; | |||
5719 | } | |||
5720 | ||||
5721 | } | |||
5722 | ||||
5723 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5724 | scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5725 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5726 | COVER, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5727 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5728 | scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5729 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5730 | PAD, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5731 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5732 | scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5733 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5734 | NONE, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5735 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5736 | scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5737 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5738 | NORMAL, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5739 | ||||
5740 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
5741 | scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst, | |||
5742 | const uint32_t * mask, | |||
5743 | const uint32_t * src_top, | |||
5744 | const uint32_t * src_bottom, | |||
5745 | int32_t w, | |||
5746 | int wt, | |||
5747 | int wb, | |||
5748 | pixman_fixed_t vx_, | |||
5749 | pixman_fixed_t unit_x_, | |||
5750 | pixman_fixed_t max_vx, | |||
5751 | pixman_bool_t zero_src) | |||
5752 | { | |||
5753 | intptr_t vx = vx_; | |||
5754 | intptr_t unit_x = unit_x_; | |||
5755 | BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)); | |||
5756 | uint32_t pix1, pix2; | |||
5757 | ||||
5758 | while (w && ((uintptr_t)dst & 15)) | |||
5759 | { | |||
5760 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
5761 | *dst++ = pix1 | 0xFF000000; | |||
5762 | w--; | |||
5763 | } | |||
5764 | ||||
5765 | while ((w -= 4) >= 0) { | |||
5766 | __m128i xmm_src; | |||
5767 | BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1 , xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0); | |||
5768 | _mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000)); | |||
5769 | dst += 4; | |||
5770 | } | |||
5771 | ||||
5772 | if (w & 2) | |||
5773 | { | |||
5774 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
5775 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix2 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
5776 | *dst++ = pix1 | 0xFF000000; | |||
5777 | *dst++ = pix2 | 0xFF000000; | |||
5778 | } | |||
5779 | ||||
5780 | if (w & 1) | |||
5781 | { | |||
5782 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
5783 | *dst = pix1 | 0xFF000000; | |||
5784 | } | |||
5785 | } | |||
5786 | ||||
5787 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_cover_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5788 | scaled_bilinear_scanline_sse2_x888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5789 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5790 | COVER, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5791 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_pad_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5792 | scaled_bilinear_scanline_sse2_x888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5793 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5794 | PAD, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5795 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_normal_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5796 | scaled_bilinear_scanline_sse2_x888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5797 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5798 | NORMAL, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5799 | ||||
5800 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
5801 | scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, | |||
5802 | const uint32_t * mask, | |||
5803 | const uint32_t * src_top, | |||
5804 | const uint32_t * src_bottom, | |||
5805 | int32_t w, | |||
5806 | int wt, | |||
5807 | int wb, | |||
5808 | pixman_fixed_t vx_, | |||
5809 | pixman_fixed_t unit_x_, | |||
5810 | pixman_fixed_t max_vx, | |||
5811 | pixman_bool_t zero_src) | |||
5812 | { | |||
5813 | intptr_t vx = vx_; | |||
5814 | intptr_t unit_x = unit_x_; | |||
5815 | BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)); | |||
5816 | uint32_t pix1, pix2; | |||
5817 | ||||
5818 | while (w && ((uintptr_t)dst & 15)) | |||
5819 | { | |||
5820 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
5821 | ||||
5822 | if (pix1) | |||
5823 | { | |||
5824 | pix2 = *dst; | |||
5825 | *dst = core_combine_over_u_pixel_sse2 (pix1, pix2); | |||
5826 | } | |||
5827 | ||||
5828 | w--; | |||
5829 | dst++; | |||
5830 | } | |||
5831 | ||||
5832 | while (w >= 4) | |||
5833 | { | |||
5834 | __m128i xmm_src; | |||
5835 | __m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo; | |||
5836 | __m128i xmm_alpha_hi, xmm_alpha_lo; | |||
5837 | ||||
5838 | BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1 , xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0); | |||
5839 | ||||
5840 | if (!is_zero (xmm_src)) | |||
5841 | { | |||
5842 | if (is_opaque (xmm_src)) | |||
5843 | { | |||
5844 | save_128_aligned ((__m128i *)dst, xmm_src); | |||
5845 | } | |||
5846 | else | |||
5847 | { | |||
5848 | __m128i xmm_dst = load_128_aligned ((__m128i *)dst); | |||
5849 | ||||
5850 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
5851 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
5852 | ||||
5853 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi); | |||
5854 | over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi, | |||
5855 | &xmm_dst_lo, &xmm_dst_hi); | |||
5856 | ||||
5857 | save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
5858 | } | |||
5859 | } | |||
5860 | ||||
5861 | w -= 4; | |||
5862 | dst += 4; | |||
5863 | } | |||
5864 | ||||
5865 | while (w) | |||
5866 | { | |||
5867 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
5868 | ||||
5869 | if (pix1) | |||
5870 | { | |||
5871 | pix2 = *dst; | |||
5872 | *dst = core_combine_over_u_pixel_sse2 (pix1, pix2); | |||
5873 | } | |||
5874 | ||||
5875 | w--; | |||
5876 | dst++; | |||
5877 | } | |||
5878 | } | |||
5879 | ||||
5880 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER ( dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0 ); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5881 | scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER ( dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0 ); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5882 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER ( dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0 ); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5883 | COVER, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER ( dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0 ); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5884 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5885 | scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5886 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5887 | PAD, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5888 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5889 | scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5890 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5891 | NONE, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5892 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5893 | scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5894 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5895 | NORMAL, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t ) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride ; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
5896 | ||||
5897 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
5898 | scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, | |||
5899 | const uint8_t * mask, | |||
5900 | const uint32_t * src_top, | |||
5901 | const uint32_t * src_bottom, | |||
5902 | int32_t w, | |||
5903 | int wt, | |||
5904 | int wb, | |||
5905 | pixman_fixed_t vx_, | |||
5906 | pixman_fixed_t unit_x_, | |||
5907 | pixman_fixed_t max_vx, | |||
5908 | pixman_bool_t zero_src) | |||
5909 | { | |||
5910 | intptr_t vx = vx_; | |||
5911 | intptr_t unit_x = unit_x_; | |||
5912 | BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)); | |||
5913 | uint32_t pix1, pix2; | |||
5914 | ||||
5915 | while (w && ((uintptr_t)dst & 15)) | |||
5916 | { | |||
5917 | uint32_t sa; | |||
5918 | uint8_t m = *mask++; | |||
5919 | ||||
5920 | if (m) | |||
5921 | { | |||
5922 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
5923 | sa = pix1 >> 24; | |||
5924 | ||||
5925 | if (sa == 0xff && m == 0xff) | |||
5926 | { | |||
5927 | *dst = pix1; | |||
5928 | } | |||
5929 | else | |||
5930 | { | |||
5931 | __m128i ms, md, ma, msa; | |||
5932 | ||||
5933 | pix2 = *dst; | |||
5934 | ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); | |||
5935 | ms = unpack_32_1x128 (pix1); | |||
5936 | md = unpack_32_1x128 (pix2); | |||
5937 | ||||
5938 | msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); | |||
5939 | ||||
5940 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); | |||
5941 | } | |||
5942 | } | |||
5943 | else | |||
5944 | { | |||
5945 | BILINEAR_SKIP_ONE_PIXEL ()do { vx += unit_x; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); } while (0); | |||
5946 | } | |||
5947 | ||||
5948 | w--; | |||
5949 | dst++; | |||
5950 | } | |||
5951 | ||||
5952 | while (w >= 4) | |||
5953 | { | |||
5954 | uint32_t m; | |||
5955 | ||||
5956 | __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; | |||
5957 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
5958 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | |||
5959 | ||||
5960 | memcpy(&m, mask, sizeof(uint32_t)); | |||
5961 | ||||
5962 | if (m) | |||
5963 | { | |||
5964 | BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1 , xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0); | |||
5965 | ||||
5966 | if (m == 0xffffffff && is_opaque (xmm_src)) | |||
5967 | { | |||
5968 | save_128_aligned ((__m128i *)dst, xmm_src); | |||
5969 | } | |||
5970 | else | |||
5971 | { | |||
5972 | xmm_dst = load_128_aligned ((__m128i *)dst); | |||
5973 | ||||
5974 | xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); | |||
5975 | ||||
5976 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
5977 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | |||
5978 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
5979 | ||||
5980 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); | |||
5981 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | |||
5982 | ||||
5983 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, | |||
5984 | &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); | |||
5985 | ||||
5986 | save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
5987 | } | |||
5988 | } | |||
5989 | else | |||
5990 | { | |||
5991 | BILINEAR_SKIP_FOUR_PIXELS ()do { vx += unit_x * 4; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4) ; } while(0); | |||
5992 | } | |||
5993 | ||||
5994 | w -= 4; | |||
5995 | dst += 4; | |||
5996 | mask += 4; | |||
5997 | } | |||
5998 | ||||
5999 | while (w) | |||
6000 | { | |||
6001 | uint32_t sa; | |||
6002 | uint8_t m = *mask++; | |||
6003 | ||||
6004 | if (m) | |||
6005 | { | |||
6006 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
6007 | sa = pix1 >> 24; | |||
6008 | ||||
6009 | if (sa == 0xff && m == 0xff) | |||
6010 | { | |||
6011 | *dst = pix1; | |||
6012 | } | |||
6013 | else | |||
6014 | { | |||
6015 | __m128i ms, md, ma, msa; | |||
6016 | ||||
6017 | pix2 = *dst; | |||
6018 | ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); | |||
6019 | ms = unpack_32_1x128 (pix1); | |||
6020 | md = unpack_32_1x128 (pix2); | |||
6021 | ||||
6022 | msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); | |||
6023 | ||||
6024 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); | |||
6025 | } | |||
6026 | } | |||
6027 | else | |||
6028 | { | |||
6029 | BILINEAR_SKIP_ONE_PIXEL ()do { vx += unit_x; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); } while (0); | |||
6030 | } | |||
6031 | ||||
6032 | w--; | |||
6033 | dst++; | |||
6034 | } | |||
6035 | } | |||
6036 | ||||
6037 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6038 | scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6039 | uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6040 | COVER, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6041 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6042 | scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6043 | uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6044 | PAD, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6045 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6046 | scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6047 | uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6048 | NONE, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6049 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6050 | scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6051 | uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6052 | NORMAL, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6053 | ||||
6054 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | |||
6055 | scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst, | |||
6056 | const uint32_t * mask, | |||
6057 | const uint32_t * src_top, | |||
6058 | const uint32_t * src_bottom, | |||
6059 | int32_t w, | |||
6060 | int wt, | |||
6061 | int wb, | |||
6062 | pixman_fixed_t vx_, | |||
6063 | pixman_fixed_t unit_x_, | |||
6064 | pixman_fixed_t max_vx, | |||
6065 | pixman_bool_t zero_src) | |||
6066 | { | |||
6067 | intptr_t vx = vx_; | |||
6068 | intptr_t unit_x = unit_x_; | |||
6069 | BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)); | |||
6070 | uint32_t pix1; | |||
6071 | __m128i xmm_mask; | |||
6072 | ||||
6073 | if (zero_src || (*mask >> 24) == 0) | |||
6074 | return; | |||
6075 | ||||
6076 | xmm_mask = create_mask_16_128 (*mask >> 24); | |||
6077 | ||||
6078 | while (w && ((uintptr_t)dst & 15)) | |||
6079 | { | |||
6080 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
6081 | if (pix1) | |||
6082 | { | |||
6083 | uint32_t d = *dst; | |||
6084 | ||||
6085 | __m128i ms = unpack_32_1x128 (pix1); | |||
6086 | __m128i alpha = expand_alpha_1x128 (ms); | |||
6087 | __m128i dest = xmm_mask; | |||
6088 | __m128i alpha_dst = unpack_32_1x128 (d); | |||
6089 | ||||
6090 | *dst = pack_1x128_32 | |||
6091 | (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); | |||
6092 | } | |||
6093 | ||||
6094 | dst++; | |||
6095 | w--; | |||
6096 | } | |||
6097 | ||||
6098 | while (w >= 4) | |||
6099 | { | |||
6100 | __m128i xmm_src; | |||
6101 | BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1 , xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0); | |||
6102 | ||||
6103 | if (!is_zero (xmm_src)) | |||
6104 | { | |||
6105 | __m128i xmm_src_lo, xmm_src_hi; | |||
6106 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | |||
6107 | __m128i xmm_alpha_lo, xmm_alpha_hi; | |||
6108 | ||||
6109 | xmm_dst = load_128_aligned ((__m128i*)dst); | |||
6110 | ||||
6111 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | |||
6112 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | |||
6113 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | |||
6114 | &xmm_alpha_lo, &xmm_alpha_hi); | |||
6115 | ||||
6116 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, | |||
6117 | &xmm_alpha_lo, &xmm_alpha_hi, | |||
6118 | &xmm_mask, &xmm_mask, | |||
6119 | &xmm_dst_lo, &xmm_dst_hi); | |||
6120 | ||||
6121 | save_128_aligned | |||
6122 | ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | |||
6123 | } | |||
6124 | ||||
6125 | dst += 4; | |||
6126 | w -= 4; | |||
6127 | } | |||
6128 | ||||
6129 | while (w) | |||
6130 | { | |||
6131 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | |||
6132 | if (pix1) | |||
6133 | { | |||
6134 | uint32_t d = *dst; | |||
6135 | ||||
6136 | __m128i ms = unpack_32_1x128 (pix1); | |||
6137 | __m128i alpha = expand_alpha_1x128 (ms); | |||
6138 | __m128i dest = xmm_mask; | |||
6139 | __m128i alpha_dst = unpack_32_1x128 (d); | |||
6140 | ||||
6141 | *dst = pack_1x128_32 | |||
6142 | (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); | |||
6143 | } | |||
6144 | ||||
6145 | dst++; | |||
6146 | w--; | |||
6147 | } | |||
6148 | } | |||
6149 | ||||
6150 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6151 | scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6152 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6153 | COVER, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( uint32_t) (src_image->bits.width) << 16))); max_x = ( (int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1 ; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6154 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6155 | scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6156 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6157 | PAD, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6158 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6159 | scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6160 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6161 | NONE, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6162 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6163 | scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6164 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6165 | NORMAL, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647 ); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y ) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) << 16))); if (!_moz_pixman_transform_point_3d (src_image->common .transform, &v)) return; unit_x = src_image->common.transform ->matrix[0][0]; unit_y = src_image->common.transform-> matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) ( 1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t ) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((uint32_t) (src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image->bits.width; need_src_extension = 1; } else { src_width = src_image->bits.width; need_src_extension = 0 ; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width ) << 16)); } while (--height >= 0) { int weight1, weight2 ; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1 [2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image ->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1 [0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t ) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | |||
6166 | ||||
6167 | static const pixman_fast_path_t sse2_fast_paths[] = | |||
6168 | { | |||
6169 | /* PIXMAN_OP_OVER */ | |||
6170 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_0565 }, | |||
6171 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_0565 }, | |||
6172 | PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888 }, | |||
6173 | PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888 }, | |||
6174 | PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ((1 << 5) | ( 1 << 1) | (1 << 6)), sse2_composite_over_n_0565 }, | |||
6175 | PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, sse2_composite_over_n_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ((1 << 5) | ( 1 << 1) | (1 << 6)), sse2_composite_over_n_0565 }, | |||
6176 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888 }, | |||
6177 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888 }, | |||
6178 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888 }, | |||
6179 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888 }, | |||
6180 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_0565 }, | |||
6181 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_0565 }, | |||
6182 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888 }, | |||
6183 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888 }, | |||
6184 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888 }, | |||
6185 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888 }, | |||
6186 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ( (PIXMAN_a8r8g8b8 == (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ( (0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888_8888 }, | |||
6187 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888 }, | |||
6188 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888 }, | |||
6189 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888 }, | |||
6190 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888 }, | |||
6191 | PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888 }, | |||
6192 | PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888 }, | |||
6193 | PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888 }, | |||
6194 | PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888 }, | |||
6195 | PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888 }, | |||
6196 | PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888 }, | |||
6197 | PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888 }, | |||
6198 | PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888 }, | |||
6199 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888 }, | |||
6200 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888 }, | |||
6201 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888 }, | |||
6202 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888 }, | |||
6203 | PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca }, | |||
6204 | PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca }, | |||
6205 | PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca }, | |||
6206 | PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca }, | |||
6207 | PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_r5g6b5, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_0565_ca }, | |||
6208 | PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_b5g6r5, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_0565_ca }, | |||
6209 | PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((2) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888 }, | |||
6210 | PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((2) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888 }, | |||
6211 | PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((3) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888 }, | |||
6212 | PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((3) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888 }, | |||
6213 | PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((2) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ((1 << 5) | ( 1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_0565 }, | |||
6214 | PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((3) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ((1 << 5) | ( 1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_0565 }, | |||
6215 | PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | |||
6216 | PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | |||
6217 | ||||
6218 | /* PIXMAN_OP_OVER_REVERSE */ | |||
6219 | PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888){ PIXMAN_OP_OVER_REVERSE, (((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | (( 0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_reverse_n_8888 }, | |||
6220 | PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888){ PIXMAN_OP_OVER_REVERSE, (((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | (( 0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_reverse_n_8888 }, | |||
6221 | ||||
6222 | /* PIXMAN_OP_ADD */ | |||
6223 | PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8888_8888_ca }, | |||
6224 | PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8){ PIXMAN_OP_ADD, PIXMAN_a8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_8_8 }, | |||
6225 | PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888){ PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_8888_8888 }, | |||
6226 | PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888){ PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_8888_8888 }, | |||
6227 | PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8 }, | |||
6228 | PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8 }, | |||
6229 | PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8888 }, | |||
6230 | PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8888 }, | |||
6231 | PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8888 }, | |||
6232 | PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8888 }, | |||
6233 | PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888 }, | |||
6234 | PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888 }, | |||
6235 | PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888 }, | |||
6236 | PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888 }, | |||
6237 | ||||
6238 | /* PIXMAN_OP_SRC */ | |||
6239 | PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 }, | |||
6240 | PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 }, | |||
6241 | PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 }, | |||
6242 | PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 }, | |||
6243 | PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565 }, | |||
6244 | PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565 }, | |||
6245 | PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565 }, | |||
6246 | PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565 }, | |||
6247 | PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_8888 }, | |||
6248 | PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_8888 }, | |||
6249 | PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | |||
6250 | PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | |||
6251 | PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | |||
6252 | PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | |||
6253 | PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | |||
6254 | PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | |||
6255 | PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_r5g6b5, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_r5g6b5 == ( ((0) << 24) | ((1) << 16) | ((0) << 12) | ( (0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | |||
6256 | PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_b5g6r5, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_b5g6r5 == ( ((0) << 24) | ((1) << 16) | ((0) << 12) | ( (0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | |||
6257 | ||||
6258 | /* PIXMAN_OP_IN */ | |||
6259 | PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8){ PIXMAN_OP_IN, PIXMAN_a8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_in_8_8 }, | |||
6260 | PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8){ PIXMAN_OP_IN, (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_in_n_8_8 }, | |||
6261 | PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8){ PIXMAN_OP_IN, (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_in_n_8 }, | |||
6262 | ||||
6263 | SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER , }, | |||
6264 | SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER , }, | |||
6265 | SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER , }, | |||
6266 | SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER , }, | |||
6267 | ||||
6268 | SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER , }, | |||
6269 | SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER , }, | |||
6270 | SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER , }, | |||
6271 | SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER , }, | |||
6272 | ||||
6273 | SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC, }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC , }, | |||
6274 | SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC, }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC , }, | |||
6275 | SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC , }, { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC, }, { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC , }, { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC , }, | |||
6276 | SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC, }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC , }, | |||
6277 | SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC, }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC , }, | |||
6278 | SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC , }, { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC, }, { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC , }, { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC , }, | |||
6279 | ||||
6280 | SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC , }, | |||
6281 | SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC , }, | |||
6282 | SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC , }, | |||
6283 | SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC , }, | |||
6284 | SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 3) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC , }, | |||
6285 | SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 3) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC , }, | |||
6286 | ||||
6287 | SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER , }, | |||
6288 | SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER , }, | |||
6289 | SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER , }, | |||
6290 | SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER , }, | |||
6291 | ||||
6292 | SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER , }, | |||
6293 | SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER , }, | |||
6294 | SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER , }, | |||
6295 | SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER , }, | |||
6296 | ||||
6297 | SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ( (0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER , }, | |||
6298 | SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ( (0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER , }, | |||
6299 | SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ( (0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER , }, | |||
6300 | SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ( (0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER , }, | |||
6301 | ||||
6302 | { PIXMAN_OP_NONE }, | |||
6303 | }; | |||
6304 | ||||
6305 | static uint32_t * | |||
6306 | sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) | |||
6307 | { | |||
6308 | int w = iter->width; | |||
6309 | __m128i ff000000 = mask_ff000000; | |||
6310 | uint32_t *dst = iter->buffer; | |||
6311 | uint32_t *src = (uint32_t *)iter->bits; | |||
6312 | ||||
6313 | iter->bits += iter->stride; | |||
6314 | ||||
6315 | while (w && ((uintptr_t)dst) & 0x0f) | |||
6316 | { | |||
6317 | *dst++ = (*src++) | 0xff000000; | |||
6318 | w--; | |||
6319 | } | |||
6320 | ||||
6321 | while (w >= 4) | |||
6322 | { | |||
6323 | save_128_aligned ( | |||
6324 | (__m128i *)dst, _mm_or_si128 ( | |||
6325 | load_128_unaligned ((__m128i *)src), ff000000)); | |||
6326 | ||||
6327 | dst += 4; | |||
6328 | src += 4; | |||
6329 | w -= 4; | |||
6330 | } | |||
6331 | ||||
6332 | while (w) | |||
6333 | { | |||
6334 | *dst++ = (*src++) | 0xff000000; | |||
6335 | w--; | |||
6336 | } | |||
6337 | ||||
6338 | return iter->buffer; | |||
6339 | } | |||
6340 | ||||
6341 | static uint32_t * | |||
6342 | sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) | |||
6343 | { | |||
6344 | int w = iter->width; | |||
6345 | uint32_t *dst = iter->buffer; | |||
6346 | uint16_t *src = (uint16_t *)iter->bits; | |||
6347 | __m128i ff000000 = mask_ff000000; | |||
6348 | ||||
6349 | iter->bits += iter->stride; | |||
6350 | ||||
6351 | while (w && ((uintptr_t)dst) & 0x0f) | |||
6352 | { | |||
6353 | uint16_t s = *src++; | |||
6354 | ||||
6355 | *dst++ = convert_0565_to_8888 (s); | |||
6356 | w--; | |||
6357 | } | |||
6358 | ||||
6359 | while (w >= 8) | |||
6360 | { | |||
6361 | __m128i lo, hi, s; | |||
6362 | ||||
6363 | s = _mm_loadu_si128 ((__m128i *)src); | |||
6364 | ||||
6365 | lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ())); | |||
6366 | hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ())); | |||
6367 | ||||
6368 | save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000)); | |||
6369 | save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000)); | |||
6370 | ||||
6371 | dst += 8; | |||
6372 | src += 8; | |||
6373 | w -= 8; | |||
6374 | } | |||
6375 | ||||
6376 | while (w) | |||
6377 | { | |||
6378 | uint16_t s = *src++; | |||
6379 | ||||
6380 | *dst++ = convert_0565_to_8888 (s); | |||
6381 | w--; | |||
6382 | } | |||
6383 | ||||
6384 | return iter->buffer; | |||
6385 | } | |||
6386 | ||||
6387 | static uint32_t * | |||
6388 | sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) | |||
6389 | { | |||
6390 | int w = iter->width; | |||
6391 | uint32_t *dst = iter->buffer; | |||
6392 | uint8_t *src = iter->bits; | |||
6393 | __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6; | |||
6394 | ||||
6395 | iter->bits += iter->stride; | |||
6396 | ||||
6397 | while (w && (((uintptr_t)dst) & 15)) | |||
6398 | { | |||
6399 | *dst++ = (uint32_t)(*(src++)) << 24; | |||
6400 | w--; | |||
6401 | } | |||
6402 | ||||
6403 | while (w >= 16) | |||
6404 | { | |||
6405 | xmm0 = _mm_loadu_si128((__m128i *)src); | |||
6406 | ||||
6407 | xmm1 = _mm_unpacklo_epi8 (_mm_setzero_si128(), xmm0); | |||
6408 | xmm2 = _mm_unpackhi_epi8 (_mm_setzero_si128(), xmm0); | |||
6409 | xmm3 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm1); | |||
6410 | xmm4 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm1); | |||
6411 | xmm5 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm2); | |||
6412 | xmm6 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm2); | |||
6413 | ||||
6414 | _mm_store_si128(((__m128i *)(dst + 0)), xmm3); | |||
6415 | _mm_store_si128(((__m128i *)(dst + 4)), xmm4); | |||
6416 | _mm_store_si128(((__m128i *)(dst + 8)), xmm5); | |||
6417 | _mm_store_si128(((__m128i *)(dst + 12)), xmm6); | |||
6418 | ||||
6419 | dst += 16; | |||
6420 | src += 16; | |||
6421 | w -= 16; | |||
6422 | } | |||
6423 | ||||
6424 | while (w) | |||
6425 | { | |||
6426 | *dst++ = (uint32_t)(*(src++)) << 24; | |||
6427 | w--; | |||
6428 | } | |||
6429 | ||||
6430 | return iter->buffer; | |||
6431 | } | |||
6432 | ||||
6433 | #define IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (1 << 0) | (1 << 25) | (1 << 23)) \ | |||
6434 | (FAST_PATH_STANDARD_FLAGS((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | FAST_PATH_ID_TRANSFORM(1 << 0) | \ | |||
6435 | FAST_PATH_BITS_IMAGE(1 << 25) | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST(1 << 23)) | |||
6436 | ||||
6437 | static const pixman_iter_info_t sse2_iters[] = | |||
6438 | { | |||
6439 | { PIXMAN_x8r8g8b8, IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (1 << 0) | (1 << 25) | (1 << 23)), ITER_NARROW, | |||
6440 | _pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL((void*)0) | |||
6441 | }, | |||
6442 | { PIXMAN_r5g6b5, IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (1 << 0) | (1 << 25) | (1 << 23)), ITER_NARROW, | |||
6443 | _pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL((void*)0) | |||
6444 | }, | |||
6445 | { PIXMAN_a8, IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (1 << 0) | (1 << 25) | (1 << 23)), ITER_NARROW, | |||
6446 | _pixman_iter_init_bits_stride, sse2_fetch_a8, NULL((void*)0) | |||
6447 | }, | |||
6448 | { PIXMAN_null(((0) << 24) | ((0) << 16) | ((0) << 12) | ( (0) << 8) | ((0) << 4) | ((0))) }, | |||
6449 | }; | |||
6450 | ||||
6451 | #if defined(__GNUC__4) && !defined(__x86_64__1) && !defined(__amd64__1) | |||
6452 | __attribute__((__force_align_arg_pointer__)) | |||
6453 | #endif | |||
6454 | pixman_implementation_t * | |||
6455 | _pixman_implementation_create_sse2 (pixman_implementation_t *fallback) | |||
6456 | { | |||
6457 | pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths); | |||
6458 | ||||
6459 | /* SSE2 constants */ | |||
6460 | mask_565_r = create_mask_2x32_128 (0x00f80000, 0x00f80000); | |||
6461 | mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000); | |||
6462 | mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0); | |||
6463 | mask_565_b = create_mask_2x32_128 (0x0000001f, 0x0000001f); | |||
6464 | mask_red = create_mask_2x32_128 (0x00f80000, 0x00f80000); | |||
6465 | mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00); | |||
6466 | mask_blue = create_mask_2x32_128 (0x000000f8, 0x000000f8); | |||
6467 | mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0); | |||
6468 | mask_565_fix_g = create_mask_2x32_128 (0x0000c000, 0x0000c000); | |||
6469 | mask_0080 = create_mask_16_128 (0x0080); | |||
6470 | mask_00ff = create_mask_16_128 (0x00ff); | |||
6471 | mask_0101 = create_mask_16_128 (0x0101); | |||
6472 | mask_ffff = create_mask_16_128 (0xffff); | |||
6473 | mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000); | |||
6474 | mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000); | |||
6475 | mask_565_rb = create_mask_2x32_128 (0x00f800f8, 0x00f800f8); | |||
6476 | mask_565_pack_multiplier = create_mask_2x32_128 (0x20000004, 0x20000004); | |||
6477 | ||||
6478 | /* Set up function pointers */ | |||
6479 | imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u; | |||
6480 | imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u; | |||
6481 | imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u; | |||
6482 | imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u; | |||
6483 | imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u; | |||
6484 | imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u; | |||
6485 | imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u; | |||
6486 | imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u; | |||
6487 | imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u; | |||
6488 | imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u; | |||
6489 | ||||
6490 | imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u; | |||
6491 | ||||
6492 | imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca; | |||
6493 | imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca; | |||
6494 | imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca; | |||
6495 | imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca; | |||
6496 | imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca; | |||
6497 | imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca; | |||
6498 | imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca; | |||
6499 | imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca; | |||
6500 | imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca; | |||
6501 | imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca; | |||
6502 | imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca; | |||
6503 | ||||
6504 | imp->blt = sse2_blt; | |||
6505 | imp->fill = sse2_fill; | |||
6506 | ||||
6507 | imp->iter_info = sse2_iters; | |||
6508 | ||||
6509 | return imp; | |||
6510 | } |
1 | /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== | |||
2 | * | |||
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | * See https://llvm.org/LICENSE.txt for license information. | |||
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | * | |||
7 | *===-----------------------------------------------------------------------=== | |||
8 | */ | |||
9 | ||||
10 | #ifndef __EMMINTRIN_H | |||
11 | #define __EMMINTRIN_H | |||
12 | ||||
13 | #if !defined(__i386__) && !defined(__x86_64__1) | |||
14 | #error "This header is only meant to be used on x86 and x64 architecture" | |||
15 | #endif | |||
16 | ||||
17 | #include <xmmintrin.h> | |||
18 | ||||
19 | typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); | |||
20 | typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); | |||
21 | ||||
22 | typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1))); | |||
23 | typedef long long __m128i_u | |||
24 | __attribute__((__vector_size__(16), __aligned__(1))); | |||
25 | ||||
26 | /* Type defines. */ | |||
27 | typedef double __v2df __attribute__((__vector_size__(16))); | |||
28 | typedef long long __v2di __attribute__((__vector_size__(16))); | |||
29 | typedef short __v8hi __attribute__((__vector_size__(16))); | |||
30 | typedef char __v16qi __attribute__((__vector_size__(16))); | |||
31 | ||||
32 | /* Unsigned types */ | |||
33 | typedef unsigned long long __v2du __attribute__((__vector_size__(16))); | |||
34 | typedef unsigned short __v8hu __attribute__((__vector_size__(16))); | |||
35 | typedef unsigned char __v16qu __attribute__((__vector_size__(16))); | |||
36 | ||||
37 | /* We need an explicitly signed variant for char. Note that this shouldn't | |||
38 | * appear in the interface though. */ | |||
39 | typedef signed char __v16qs __attribute__((__vector_size__(16))); | |||
40 | ||||
41 | #ifdef __SSE2__1 | |||
42 | /* Both _Float16 and __bf16 require SSE2 being enabled. */ | |||
43 | typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16))); | |||
44 | typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16))); | |||
45 | typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1))); | |||
46 | ||||
47 | typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16))); | |||
48 | typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16))); | |||
49 | #endif | |||
50 | ||||
51 | /* Define the default attributes for the functions in this file. */ | |||
52 | #define __DEFAULT_FN_ATTRS \ | |||
53 | __attribute__((__always_inline__, __nodebug__, \ | |||
54 | __target__("sse2,no-evex512"), __min_vector_width__(128))) | |||
55 | #define __DEFAULT_FN_ATTRS_MMX \ | |||
56 | __attribute__((__always_inline__, __nodebug__, \ | |||
57 | __target__("mmx,sse2,no-evex512"), __min_vector_width__(64))) | |||
58 | ||||
59 | /// Adds lower double-precision values in both operands and returns the | |||
60 | /// sum in the lower 64 bits of the result. The upper 64 bits of the result | |||
61 | /// are copied from the upper double-precision value of the first operand. | |||
62 | /// | |||
63 | /// \headerfile <x86intrin.h> | |||
64 | /// | |||
65 | /// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction. | |||
66 | /// | |||
67 | /// \param __a | |||
68 | /// A 128-bit vector of [2 x double] containing one of the source operands. | |||
69 | /// \param __b | |||
70 | /// A 128-bit vector of [2 x double] containing one of the source operands. | |||
71 | /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the | |||
72 | /// sum of the lower 64 bits of both operands. The upper 64 bits are copied | |||
73 | /// from the upper 64 bits of the first source operand. | |||
74 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, | |||
75 | __m128d __b) { | |||
76 | __a[0] += __b[0]; | |||
77 | return __a; | |||
78 | } | |||
79 | ||||
80 | /// Adds two 128-bit vectors of [2 x double]. | |||
81 | /// | |||
82 | /// \headerfile <x86intrin.h> | |||
83 | /// | |||
84 | /// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction. | |||
85 | /// | |||
86 | /// \param __a | |||
87 | /// A 128-bit vector of [2 x double] containing one of the source operands. | |||
88 | /// \param __b | |||
89 | /// A 128-bit vector of [2 x double] containing one of the source operands. | |||
90 | /// \returns A 128-bit vector of [2 x double] containing the sums of both | |||
91 | /// operands. | |||
92 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, | |||
93 | __m128d __b) { | |||
94 | return (__m128d)((__v2df)__a + (__v2df)__b); | |||
95 | } | |||
96 | ||||
97 | /// Subtracts the lower double-precision value of the second operand | |||
98 | /// from the lower double-precision value of the first operand and returns | |||
99 | /// the difference in the lower 64 bits of the result. The upper 64 bits of | |||
100 | /// the result are copied from the upper double-precision value of the first | |||
101 | /// operand. | |||
102 | /// | |||
103 | /// \headerfile <x86intrin.h> | |||
104 | /// | |||
105 | /// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction. | |||
106 | /// | |||
107 | /// \param __a | |||
108 | /// A 128-bit vector of [2 x double] containing the minuend. | |||
109 | /// \param __b | |||
110 | /// A 128-bit vector of [2 x double] containing the subtrahend. | |||
111 | /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the | |||
112 | /// difference of the lower 64 bits of both operands. The upper 64 bits are | |||
113 | /// copied from the upper 64 bits of the first source operand. | |||
114 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, | |||
115 | __m128d __b) { | |||
116 | __a[0] -= __b[0]; | |||
117 | return __a; | |||
118 | } | |||
119 | ||||
120 | /// Subtracts two 128-bit vectors of [2 x double]. | |||
121 | /// | |||
122 | /// \headerfile <x86intrin.h> | |||
123 | /// | |||
124 | /// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction. | |||
125 | /// | |||
126 | /// \param __a | |||
127 | /// A 128-bit vector of [2 x double] containing the minuend. | |||
128 | /// \param __b | |||
129 | /// A 128-bit vector of [2 x double] containing the subtrahend. | |||
130 | /// \returns A 128-bit vector of [2 x double] containing the differences between | |||
131 | /// both operands. | |||
132 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, | |||
133 | __m128d __b) { | |||
134 | return (__m128d)((__v2df)__a - (__v2df)__b); | |||
135 | } | |||
136 | ||||
137 | /// Multiplies lower double-precision values in both operands and returns | |||
138 | /// the product in the lower 64 bits of the result. The upper 64 bits of the | |||
139 | /// result are copied from the upper double-precision value of the first | |||
140 | /// operand. | |||
141 | /// | |||
142 | /// \headerfile <x86intrin.h> | |||
143 | /// | |||
144 | /// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction. | |||
145 | /// | |||
146 | /// \param __a | |||
147 | /// A 128-bit vector of [2 x double] containing one of the source operands. | |||
148 | /// \param __b | |||
149 | /// A 128-bit vector of [2 x double] containing one of the source operands. | |||
150 | /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the | |||
151 | /// product of the lower 64 bits of both operands. The upper 64 bits are | |||
152 | /// copied from the upper 64 bits of the first source operand. | |||
153 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, | |||
154 | __m128d __b) { | |||
155 | __a[0] *= __b[0]; | |||
156 | return __a; | |||
157 | } | |||
158 | ||||
159 | /// Multiplies two 128-bit vectors of [2 x double]. | |||
160 | /// | |||
161 | /// \headerfile <x86intrin.h> | |||
162 | /// | |||
163 | /// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction. | |||
164 | /// | |||
165 | /// \param __a | |||
166 | /// A 128-bit vector of [2 x double] containing one of the operands. | |||
167 | /// \param __b | |||
168 | /// A 128-bit vector of [2 x double] containing one of the operands. | |||
169 | /// \returns A 128-bit vector of [2 x double] containing the products of both | |||
170 | /// operands. | |||
171 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, | |||
172 | __m128d __b) { | |||
173 | return (__m128d)((__v2df)__a * (__v2df)__b); | |||
174 | } | |||
175 | ||||
176 | /// Divides the lower double-precision value of the first operand by the | |||
177 | /// lower double-precision value of the second operand and returns the | |||
178 | /// quotient in the lower 64 bits of the result. The upper 64 bits of the | |||
179 | /// result are copied from the upper double-precision value of the first | |||
180 | /// operand. | |||
181 | /// | |||
182 | /// \headerfile <x86intrin.h> | |||
183 | /// | |||
184 | /// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction. | |||
185 | /// | |||
186 | /// \param __a | |||
187 | /// A 128-bit vector of [2 x double] containing the dividend. | |||
188 | /// \param __b | |||
189 | /// A 128-bit vector of [2 x double] containing divisor. | |||
190 | /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the | |||
191 | /// quotient of the lower 64 bits of both operands. The upper 64 bits are | |||
192 | /// copied from the upper 64 bits of the first source operand. | |||
193 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, | |||
194 | __m128d __b) { | |||
195 | __a[0] /= __b[0]; | |||
196 | return __a; | |||
197 | } | |||
198 | ||||
199 | /// Performs an element-by-element division of two 128-bit vectors of | |||
200 | /// [2 x double]. | |||
201 | /// | |||
202 | /// \headerfile <x86intrin.h> | |||
203 | /// | |||
204 | /// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction. | |||
205 | /// | |||
206 | /// \param __a | |||
207 | /// A 128-bit vector of [2 x double] containing the dividend. | |||
208 | /// \param __b | |||
209 | /// A 128-bit vector of [2 x double] containing the divisor. | |||
210 | /// \returns A 128-bit vector of [2 x double] containing the quotients of both | |||
211 | /// operands. | |||
212 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, | |||
213 | __m128d __b) { | |||
214 | return (__m128d)((__v2df)__a / (__v2df)__b); | |||
215 | } | |||
216 | ||||
217 | /// Calculates the square root of the lower double-precision value of | |||
218 | /// the second operand and returns it in the lower 64 bits of the result. | |||
219 | /// The upper 64 bits of the result are copied from the upper | |||
220 | /// double-precision value of the first operand. | |||
221 | /// | |||
222 | /// \headerfile <x86intrin.h> | |||
223 | /// | |||
224 | /// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction. | |||
225 | /// | |||
226 | /// \param __a | |||
227 | /// A 128-bit vector of [2 x double] containing one of the operands. The | |||
228 | /// upper 64 bits of this operand are copied to the upper 64 bits of the | |||
229 | /// result. | |||
230 | /// \param __b | |||
231 | /// A 128-bit vector of [2 x double] containing one of the operands. The | |||
232 | /// square root is calculated using the lower 64 bits of this operand. | |||
233 | /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the | |||
234 | /// square root of the lower 64 bits of operand \a __b, and whose upper 64 | |||
235 | /// bits are copied from the upper 64 bits of operand \a __a. | |||
236 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, | |||
237 | __m128d __b) { | |||
238 | __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b); | |||
239 | return __extension__(__m128d){__c[0], __a[1]}; | |||
240 | } | |||
241 | ||||
242 | /// Calculates the square root of the each of two values stored in a | |||
243 | /// 128-bit vector of [2 x double]. | |||
244 | /// | |||
245 | /// \headerfile <x86intrin.h> | |||
246 | /// | |||
247 | /// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction. | |||
248 | /// | |||
249 | /// \param __a | |||
250 | /// A 128-bit vector of [2 x double]. | |||
251 | /// \returns A 128-bit vector of [2 x double] containing the square roots of the | |||
252 | /// values in the operand. | |||
253 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) { | |||
254 | return __builtin_ia32_sqrtpd((__v2df)__a); | |||
255 | } | |||
256 | ||||
257 | /// Compares lower 64-bit double-precision values of both operands, and | |||
258 | /// returns the lesser of the pair of values in the lower 64-bits of the | |||
259 | /// result. The upper 64 bits of the result are copied from the upper | |||
260 | /// double-precision value of the first operand. | |||
261 | /// | |||
262 | /// If either value in a comparison is NaN, returns the value from \a __b. | |||
263 | /// | |||
264 | /// \headerfile <x86intrin.h> | |||
265 | /// | |||
266 | /// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction. | |||
267 | /// | |||
268 | /// \param __a | |||
269 | /// A 128-bit vector of [2 x double] containing one of the operands. The | |||
270 | /// lower 64 bits of this operand are used in the comparison. | |||
271 | /// \param __b | |||
272 | /// A 128-bit vector of [2 x double] containing one of the operands. The | |||
273 | /// lower 64 bits of this operand are used in the comparison. | |||
274 | /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the | |||
275 | /// minimum value between both operands. The upper 64 bits are copied from | |||
276 | /// the upper 64 bits of the first source operand. | |||
277 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, | |||
278 | __m128d __b) { | |||
279 | return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b); | |||
280 | } | |||
281 | ||||
282 | /// Performs element-by-element comparison of the two 128-bit vectors of | |||
283 | /// [2 x double] and returns a vector containing the lesser of each pair of | |||
284 | /// values. | |||
285 | /// | |||
286 | /// If either value in a comparison is NaN, returns the value from \a __b. | |||
287 | /// | |||
288 | /// \headerfile <x86intrin.h> | |||
289 | /// | |||
290 | /// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction. | |||
291 | /// | |||
292 | /// \param __a | |||
293 | /// A 128-bit vector of [2 x double] containing one of the operands. | |||
294 | /// \param __b | |||
295 | /// A 128-bit vector of [2 x double] containing one of the operands. | |||
296 | /// \returns A 128-bit vector of [2 x double] containing the minimum values | |||
297 | /// between both operands. | |||
298 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, | |||
299 | __m128d __b) { | |||
300 | return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); | |||
301 | } | |||
302 | ||||
303 | /// Compares lower 64-bit double-precision values of both operands, and | |||
304 | /// returns the greater of the pair of values in the lower 64-bits of the | |||
305 | /// result. The upper 64 bits of the result are copied from the upper | |||
306 | /// double-precision value of the first operand. | |||
307 | /// | |||
308 | /// If either value in a comparison is NaN, returns the value from \a __b. | |||
309 | /// | |||
310 | /// \headerfile <x86intrin.h> | |||
311 | /// | |||
312 | /// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction. | |||
313 | /// | |||
314 | /// \param __a | |||
315 | /// A 128-bit vector of [2 x double] containing one of the operands. The | |||
316 | /// lower 64 bits of this operand are used in the comparison. | |||
317 | /// \param __b | |||
318 | /// A 128-bit vector of [2 x double] containing one of the operands. The | |||
319 | /// lower 64 bits of this operand are used in the comparison. | |||
320 | /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the | |||
321 | /// maximum value between both operands. The upper 64 bits are copied from | |||
322 | /// the upper 64 bits of the first source operand. | |||
323 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, | |||
324 | __m128d __b) { | |||
325 | return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b); | |||
326 | } | |||
327 | ||||
328 | /// Performs element-by-element comparison of the two 128-bit vectors of | |||
329 | /// [2 x double] and returns a vector containing the greater of each pair | |||
330 | /// of values. | |||
331 | /// | |||
332 | /// If either value in a comparison is NaN, returns the value from \a __b. | |||
333 | /// | |||
334 | /// \headerfile <x86intrin.h> | |||
335 | /// | |||
336 | /// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction. | |||
337 | /// | |||
338 | /// \param __a | |||
339 | /// A 128-bit vector of [2 x double] containing one of the operands. | |||
340 | /// \param __b | |||
341 | /// A 128-bit vector of [2 x double] containing one of the operands. | |||
342 | /// \returns A 128-bit vector of [2 x double] containing the maximum values | |||
343 | /// between both operands. | |||
344 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, | |||
345 | __m128d __b) { | |||
346 | return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b); | |||
347 | } | |||
348 | ||||
349 | /// Performs a bitwise AND of two 128-bit vectors of [2 x double]. | |||
350 | /// | |||
351 | /// \headerfile <x86intrin.h> | |||
352 | /// | |||
353 | /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. | |||
354 | /// | |||
355 | /// \param __a | |||
356 | /// A 128-bit vector of [2 x double] containing one of the source operands. | |||
357 | /// \param __b | |||
358 | /// A 128-bit vector of [2 x double] containing one of the source operands. | |||
359 | /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the | |||
360 | /// values between both operands. | |||
361 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, | |||
362 | __m128d __b) { | |||
363 | return (__m128d)((__v2du)__a & (__v2du)__b); | |||
364 | } | |||
365 | ||||
366 | /// Performs a bitwise AND of two 128-bit vectors of [2 x double], using | |||
367 | /// the one's complement of the values contained in the first source operand. | |||
368 | /// | |||
369 | /// \headerfile <x86intrin.h> | |||
370 | /// | |||
371 | /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. | |||
372 | /// | |||
373 | /// \param __a | |||
374 | /// A 128-bit vector of [2 x double] containing the left source operand. The | |||
375 | /// one's complement of this value is used in the bitwise AND. | |||
376 | /// \param __b | |||
377 | /// A 128-bit vector of [2 x double] containing the right source operand. | |||
378 | /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the | |||
379 | /// values in the second operand and the one's complement of the first | |||
380 | /// operand. | |||
381 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, | |||
382 | __m128d __b) { | |||
383 | return (__m128d)(~(__v2du)__a & (__v2du)__b); | |||
384 | } | |||
385 | ||||
386 | /// Performs a bitwise OR of two 128-bit vectors of [2 x double]. | |||
387 | /// | |||
388 | /// \headerfile <x86intrin.h> | |||
389 | /// | |||
390 | /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. | |||
391 | /// | |||
392 | /// \param __a | |||
393 | /// A 128-bit vector of [2 x double] containing one of the source operands. | |||
394 | /// \param __b | |||
395 | /// A 128-bit vector of [2 x double] containing one of the source operands. | |||
396 | /// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the | |||
397 | /// values between both operands. | |||
398 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, | |||
399 | __m128d __b) { | |||
400 | return (__m128d)((__v2du)__a | (__v2du)__b); | |||
401 | } | |||
402 | ||||
403 | /// Performs a bitwise XOR of two 128-bit vectors of [2 x double]. | |||
404 | /// | |||
405 | /// \headerfile <x86intrin.h> | |||
406 | /// | |||
407 | /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. | |||
408 | /// | |||
409 | /// \param __a | |||
410 | /// A 128-bit vector of [2 x double] containing one of the source operands. | |||
411 | /// \param __b | |||
412 | /// A 128-bit vector of [2 x double] containing one of the source operands. | |||
413 | /// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the | |||
414 | /// values between both operands. | |||
415 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, | |||
416 | __m128d __b) { | |||
417 | return (__m128d)((__v2du)__a ^ (__v2du)__b); | |||
418 | } | |||
419 | ||||
420 | /// Compares each of the corresponding double-precision values of the | |||
421 | /// 128-bit vectors of [2 x double] for equality. | |||
422 | /// | |||
423 | /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
424 | /// If either value in a comparison is NaN, returns false. | |||
425 | /// | |||
426 | /// \headerfile <x86intrin.h> | |||
427 | /// | |||
428 | /// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction. | |||
429 | /// | |||
430 | /// \param __a | |||
431 | /// A 128-bit vector of [2 x double]. | |||
432 | /// \param __b | |||
433 | /// A 128-bit vector of [2 x double]. | |||
434 | /// \returns A 128-bit vector containing the comparison results. | |||
435 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, | |||
436 | __m128d __b) { | |||
437 | return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b); | |||
438 | } | |||
439 | ||||
440 | /// Compares each of the corresponding double-precision values of the | |||
441 | /// 128-bit vectors of [2 x double] to determine if the values in the first | |||
442 | /// operand are less than those in the second operand. | |||
443 | /// | |||
444 | /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
445 | /// If either value in a comparison is NaN, returns false. | |||
446 | /// | |||
447 | /// \headerfile <x86intrin.h> | |||
448 | /// | |||
449 | /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. | |||
450 | /// | |||
451 | /// \param __a | |||
452 | /// A 128-bit vector of [2 x double]. | |||
453 | /// \param __b | |||
454 | /// A 128-bit vector of [2 x double]. | |||
455 | /// \returns A 128-bit vector containing the comparison results. | |||
456 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, | |||
457 | __m128d __b) { | |||
458 | return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b); | |||
459 | } | |||
460 | ||||
461 | /// Compares each of the corresponding double-precision values of the | |||
462 | /// 128-bit vectors of [2 x double] to determine if the values in the first | |||
463 | /// operand are less than or equal to those in the second operand. | |||
464 | /// | |||
465 | /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
466 | /// If either value in a comparison is NaN, returns false. | |||
467 | /// | |||
468 | /// \headerfile <x86intrin.h> | |||
469 | /// | |||
470 | /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. | |||
471 | /// | |||
472 | /// \param __a | |||
473 | /// A 128-bit vector of [2 x double]. | |||
474 | /// \param __b | |||
475 | /// A 128-bit vector of [2 x double]. | |||
476 | /// \returns A 128-bit vector containing the comparison results. | |||
477 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, | |||
478 | __m128d __b) { | |||
479 | return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b); | |||
480 | } | |||
481 | ||||
482 | /// Compares each of the corresponding double-precision values of the | |||
483 | /// 128-bit vectors of [2 x double] to determine if the values in the first | |||
484 | /// operand are greater than those in the second operand. | |||
485 | /// | |||
486 | /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
487 | /// If either value in a comparison is NaN, returns false. | |||
488 | /// | |||
489 | /// \headerfile <x86intrin.h> | |||
490 | /// | |||
491 | /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. | |||
492 | /// | |||
493 | /// \param __a | |||
494 | /// A 128-bit vector of [2 x double]. | |||
495 | /// \param __b | |||
496 | /// A 128-bit vector of [2 x double]. | |||
497 | /// \returns A 128-bit vector containing the comparison results. | |||
498 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, | |||
499 | __m128d __b) { | |||
500 | return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a); | |||
501 | } | |||
502 | ||||
503 | /// Compares each of the corresponding double-precision values of the | |||
504 | /// 128-bit vectors of [2 x double] to determine if the values in the first | |||
505 | /// operand are greater than or equal to those in the second operand. | |||
506 | /// | |||
507 | /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
508 | /// If either value in a comparison is NaN, returns false. | |||
509 | /// | |||
510 | /// \headerfile <x86intrin.h> | |||
511 | /// | |||
512 | /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. | |||
513 | /// | |||
514 | /// \param __a | |||
515 | /// A 128-bit vector of [2 x double]. | |||
516 | /// \param __b | |||
517 | /// A 128-bit vector of [2 x double]. | |||
518 | /// \returns A 128-bit vector containing the comparison results. | |||
519 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, | |||
520 | __m128d __b) { | |||
521 | return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a); | |||
522 | } | |||
523 | ||||
524 | /// Compares each of the corresponding double-precision values of the | |||
525 | /// 128-bit vectors of [2 x double] to determine if the values in the first | |||
526 | /// operand are ordered with respect to those in the second operand. | |||
527 | /// | |||
528 | /// A pair of double-precision values are ordered with respect to each | |||
529 | /// other if neither value is a NaN. Each comparison returns 0x0 for false, | |||
530 | /// 0xFFFFFFFFFFFFFFFF for true. | |||
531 | /// | |||
532 | /// \headerfile <x86intrin.h> | |||
533 | /// | |||
534 | /// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction. | |||
535 | /// | |||
536 | /// \param __a | |||
537 | /// A 128-bit vector of [2 x double]. | |||
538 | /// \param __b | |||
539 | /// A 128-bit vector of [2 x double]. | |||
540 | /// \returns A 128-bit vector containing the comparison results. | |||
541 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, | |||
542 | __m128d __b) { | |||
543 | return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b); | |||
544 | } | |||
545 | ||||
546 | /// Compares each of the corresponding double-precision values of the | |||
547 | /// 128-bit vectors of [2 x double] to determine if the values in the first | |||
548 | /// operand are unordered with respect to those in the second operand. | |||
549 | /// | |||
550 | /// A pair of double-precision values are unordered with respect to each | |||
551 | /// other if one or both values are NaN. Each comparison returns 0x0 for | |||
552 | /// false, 0xFFFFFFFFFFFFFFFF for true. | |||
553 | /// | |||
554 | /// \headerfile <x86intrin.h> | |||
555 | /// | |||
556 | /// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c> | |||
557 | /// instruction. | |||
558 | /// | |||
559 | /// \param __a | |||
560 | /// A 128-bit vector of [2 x double]. | |||
561 | /// \param __b | |||
562 | /// A 128-bit vector of [2 x double]. | |||
563 | /// \returns A 128-bit vector containing the comparison results. | |||
564 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, | |||
565 | __m128d __b) { | |||
566 | return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b); | |||
567 | } | |||
568 | ||||
569 | /// Compares each of the corresponding double-precision values of the | |||
570 | /// 128-bit vectors of [2 x double] to determine if the values in the first | |||
571 | /// operand are unequal to those in the second operand. | |||
572 | /// | |||
573 | /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
574 | /// If either value in a comparison is NaN, returns true. | |||
575 | /// | |||
576 | /// \headerfile <x86intrin.h> | |||
577 | /// | |||
578 | /// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction. | |||
579 | /// | |||
580 | /// \param __a | |||
581 | /// A 128-bit vector of [2 x double]. | |||
582 | /// \param __b | |||
583 | /// A 128-bit vector of [2 x double]. | |||
584 | /// \returns A 128-bit vector containing the comparison results. | |||
585 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, | |||
586 | __m128d __b) { | |||
587 | return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b); | |||
588 | } | |||
589 | ||||
590 | /// Compares each of the corresponding double-precision values of the | |||
591 | /// 128-bit vectors of [2 x double] to determine if the values in the first | |||
592 | /// operand are not less than those in the second operand. | |||
593 | /// | |||
594 | /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
595 | /// If either value in a comparison is NaN, returns true. | |||
596 | /// | |||
597 | /// \headerfile <x86intrin.h> | |||
598 | /// | |||
599 | /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. | |||
600 | /// | |||
601 | /// \param __a | |||
602 | /// A 128-bit vector of [2 x double]. | |||
603 | /// \param __b | |||
604 | /// A 128-bit vector of [2 x double]. | |||
605 | /// \returns A 128-bit vector containing the comparison results. | |||
606 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, | |||
607 | __m128d __b) { | |||
608 | return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b); | |||
609 | } | |||
610 | ||||
611 | /// Compares each of the corresponding double-precision values of the | |||
612 | /// 128-bit vectors of [2 x double] to determine if the values in the first | |||
613 | /// operand are not less than or equal to those in the second operand. | |||
614 | /// | |||
615 | /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
616 | /// If either value in a comparison is NaN, returns true. | |||
617 | /// | |||
618 | /// \headerfile <x86intrin.h> | |||
619 | /// | |||
620 | /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. | |||
621 | /// | |||
622 | /// \param __a | |||
623 | /// A 128-bit vector of [2 x double]. | |||
624 | /// \param __b | |||
625 | /// A 128-bit vector of [2 x double]. | |||
626 | /// \returns A 128-bit vector containing the comparison results. | |||
627 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, | |||
628 | __m128d __b) { | |||
629 | return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b); | |||
630 | } | |||
631 | ||||
632 | /// Compares each of the corresponding double-precision values of the | |||
633 | /// 128-bit vectors of [2 x double] to determine if the values in the first | |||
634 | /// operand are not greater than those in the second operand. | |||
635 | /// | |||
636 | /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
637 | /// If either value in a comparison is NaN, returns true. | |||
638 | /// | |||
639 | /// \headerfile <x86intrin.h> | |||
640 | /// | |||
641 | /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. | |||
642 | /// | |||
643 | /// \param __a | |||
644 | /// A 128-bit vector of [2 x double]. | |||
645 | /// \param __b | |||
646 | /// A 128-bit vector of [2 x double]. | |||
647 | /// \returns A 128-bit vector containing the comparison results. | |||
648 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, | |||
649 | __m128d __b) { | |||
650 | return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a); | |||
651 | } | |||
652 | ||||
653 | /// Compares each of the corresponding double-precision values of the | |||
654 | /// 128-bit vectors of [2 x double] to determine if the values in the first | |||
655 | /// operand are not greater than or equal to those in the second operand. | |||
656 | /// | |||
657 | /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
658 | /// If either value in a comparison is NaN, returns true. | |||
659 | /// | |||
660 | /// \headerfile <x86intrin.h> | |||
661 | /// | |||
662 | /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. | |||
663 | /// | |||
664 | /// \param __a | |||
665 | /// A 128-bit vector of [2 x double]. | |||
666 | /// \param __b | |||
667 | /// A 128-bit vector of [2 x double]. | |||
668 | /// \returns A 128-bit vector containing the comparison results. | |||
669 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, | |||
670 | __m128d __b) { | |||
671 | return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a); | |||
672 | } | |||
673 | ||||
674 | /// Compares the lower double-precision floating-point values in each of | |||
675 | /// the two 128-bit floating-point vectors of [2 x double] for equality. | |||
676 | /// | |||
677 | /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
678 | /// If either value in a comparison is NaN, returns false. | |||
679 | /// | |||
680 | /// \headerfile <x86intrin.h> | |||
681 | /// | |||
682 | /// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction. | |||
683 | /// | |||
684 | /// \param __a | |||
685 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
686 | /// compared to the lower double-precision value of \a __b. | |||
687 | /// \param __b | |||
688 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
689 | /// compared to the lower double-precision value of \a __a. | |||
690 | /// \returns A 128-bit vector. The lower 64 bits contains the comparison | |||
691 | /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. | |||
692 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, | |||
693 | __m128d __b) { | |||
694 | return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b); | |||
695 | } | |||
696 | ||||
697 | /// Compares the lower double-precision floating-point values in each of | |||
698 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
699 | /// the value in the first parameter is less than the corresponding value in | |||
700 | /// the second parameter. | |||
701 | /// | |||
702 | /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
703 | /// If either value in a comparison is NaN, returns false. | |||
704 | /// | |||
705 | /// \headerfile <x86intrin.h> | |||
706 | /// | |||
707 | /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. | |||
708 | /// | |||
709 | /// \param __a | |||
710 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
711 | /// compared to the lower double-precision value of \a __b. | |||
712 | /// \param __b | |||
713 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
714 | /// compared to the lower double-precision value of \a __a. | |||
715 | /// \returns A 128-bit vector. The lower 64 bits contains the comparison | |||
716 | /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. | |||
717 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, | |||
718 | __m128d __b) { | |||
719 | return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b); | |||
720 | } | |||
721 | ||||
722 | /// Compares the lower double-precision floating-point values in each of | |||
723 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
724 | /// the value in the first parameter is less than or equal to the | |||
725 | /// corresponding value in the second parameter. | |||
726 | /// | |||
727 | /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
728 | /// If either value in a comparison is NaN, returns false. | |||
729 | /// | |||
730 | /// \headerfile <x86intrin.h> | |||
731 | /// | |||
732 | /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. | |||
733 | /// | |||
734 | /// \param __a | |||
735 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
736 | /// compared to the lower double-precision value of \a __b. | |||
737 | /// \param __b | |||
738 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
739 | /// compared to the lower double-precision value of \a __a. | |||
740 | /// \returns A 128-bit vector. The lower 64 bits contains the comparison | |||
741 | /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. | |||
742 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, | |||
743 | __m128d __b) { | |||
744 | return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b); | |||
745 | } | |||
746 | ||||
747 | /// Compares the lower double-precision floating-point values in each of | |||
748 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
749 | /// the value in the first parameter is greater than the corresponding value | |||
750 | /// in the second parameter. | |||
751 | /// | |||
752 | /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
753 | /// If either value in a comparison is NaN, returns false. | |||
754 | /// | |||
755 | /// \headerfile <x86intrin.h> | |||
756 | /// | |||
757 | /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. | |||
758 | /// | |||
759 | /// \param __a | |||
760 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
761 | /// compared to the lower double-precision value of \a __b. | |||
762 | /// \param __b | |||
763 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
764 | /// compared to the lower double-precision value of \a __a. | |||
765 | /// \returns A 128-bit vector. The lower 64 bits contains the comparison | |||
766 | /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. | |||
767 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, | |||
768 | __m128d __b) { | |||
769 | __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a); | |||
770 | return __extension__(__m128d){__c[0], __a[1]}; | |||
771 | } | |||
772 | ||||
773 | /// Compares the lower double-precision floating-point values in each of | |||
774 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
775 | /// the value in the first parameter is greater than or equal to the | |||
776 | /// corresponding value in the second parameter. | |||
777 | /// | |||
778 | /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
779 | /// If either value in a comparison is NaN, returns false. | |||
780 | /// | |||
781 | /// \headerfile <x86intrin.h> | |||
782 | /// | |||
783 | /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. | |||
784 | /// | |||
785 | /// \param __a | |||
786 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
787 | /// compared to the lower double-precision value of \a __b. | |||
788 | /// \param __b | |||
789 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
790 | /// compared to the lower double-precision value of \a __a. | |||
791 | /// \returns A 128-bit vector. The lower 64 bits contains the comparison | |||
792 | /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. | |||
793 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, | |||
794 | __m128d __b) { | |||
795 | __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a); | |||
796 | return __extension__(__m128d){__c[0], __a[1]}; | |||
797 | } | |||
798 | ||||
799 | /// Compares the lower double-precision floating-point values in each of | |||
800 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
801 | /// the value in the first parameter is ordered with respect to the | |||
802 | /// corresponding value in the second parameter. | |||
803 | /// | |||
804 | /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair | |||
805 | /// of double-precision values are ordered with respect to each other if | |||
806 | /// neither value is a NaN. | |||
807 | /// | |||
808 | /// \headerfile <x86intrin.h> | |||
809 | /// | |||
810 | /// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction. | |||
811 | /// | |||
812 | /// \param __a | |||
813 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
814 | /// compared to the lower double-precision value of \a __b. | |||
815 | /// \param __b | |||
816 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
817 | /// compared to the lower double-precision value of \a __a. | |||
818 | /// \returns A 128-bit vector. The lower 64 bits contains the comparison | |||
819 | /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. | |||
820 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, | |||
821 | __m128d __b) { | |||
822 | return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b); | |||
823 | } | |||
824 | ||||
825 | /// Compares the lower double-precision floating-point values in each of | |||
826 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
827 | /// the value in the first parameter is unordered with respect to the | |||
828 | /// corresponding value in the second parameter. | |||
829 | /// | |||
830 | /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair | |||
831 | /// of double-precision values are unordered with respect to each other if | |||
832 | /// one or both values are NaN. | |||
833 | /// | |||
834 | /// \headerfile <x86intrin.h> | |||
835 | /// | |||
836 | /// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c> | |||
837 | /// instruction. | |||
838 | /// | |||
839 | /// \param __a | |||
840 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
841 | /// compared to the lower double-precision value of \a __b. | |||
842 | /// \param __b | |||
843 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
844 | /// compared to the lower double-precision value of \a __a. | |||
845 | /// \returns A 128-bit vector. The lower 64 bits contains the comparison | |||
846 | /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. | |||
847 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, | |||
848 | __m128d __b) { | |||
849 | return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b); | |||
850 | } | |||
851 | ||||
852 | /// Compares the lower double-precision floating-point values in each of | |||
853 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
854 | /// the value in the first parameter is unequal to the corresponding value in | |||
855 | /// the second parameter. | |||
856 | /// | |||
857 | /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
858 | /// If either value in a comparison is NaN, returns true. | |||
859 | /// | |||
860 | /// \headerfile <x86intrin.h> | |||
861 | /// | |||
862 | /// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction. | |||
863 | /// | |||
864 | /// \param __a | |||
865 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
866 | /// compared to the lower double-precision value of \a __b. | |||
867 | /// \param __b | |||
868 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
869 | /// compared to the lower double-precision value of \a __a. | |||
870 | /// \returns A 128-bit vector. The lower 64 bits contains the comparison | |||
871 | /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. | |||
872 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, | |||
873 | __m128d __b) { | |||
874 | return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b); | |||
875 | } | |||
876 | ||||
877 | /// Compares the lower double-precision floating-point values in each of | |||
878 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
879 | /// the value in the first parameter is not less than the corresponding | |||
880 | /// value in the second parameter. | |||
881 | /// | |||
882 | /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
883 | /// If either value in a comparison is NaN, returns true. | |||
884 | /// | |||
885 | /// \headerfile <x86intrin.h> | |||
886 | /// | |||
887 | /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. | |||
888 | /// | |||
889 | /// \param __a | |||
890 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
891 | /// compared to the lower double-precision value of \a __b. | |||
892 | /// \param __b | |||
893 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
894 | /// compared to the lower double-precision value of \a __a. | |||
895 | /// \returns A 128-bit vector. The lower 64 bits contains the comparison | |||
896 | /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. | |||
897 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, | |||
898 | __m128d __b) { | |||
899 | return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b); | |||
900 | } | |||
901 | ||||
902 | /// Compares the lower double-precision floating-point values in each of | |||
903 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
904 | /// the value in the first parameter is not less than or equal to the | |||
905 | /// corresponding value in the second parameter. | |||
906 | /// | |||
907 | /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
908 | /// If either value in a comparison is NaN, returns true. | |||
909 | /// | |||
910 | /// \headerfile <x86intrin.h> | |||
911 | /// | |||
912 | /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. | |||
913 | /// | |||
914 | /// \param __a | |||
915 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
916 | /// compared to the lower double-precision value of \a __b. | |||
917 | /// \param __b | |||
918 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
919 | /// compared to the lower double-precision value of \a __a. | |||
920 | /// \returns A 128-bit vector. The lower 64 bits contains the comparison | |||
921 | /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. | |||
922 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, | |||
923 | __m128d __b) { | |||
924 | return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b); | |||
925 | } | |||
926 | ||||
927 | /// Compares the lower double-precision floating-point values in each of | |||
928 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
929 | /// the value in the first parameter is not greater than the corresponding | |||
930 | /// value in the second parameter. | |||
931 | /// | |||
932 | /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
933 | /// If either value in a comparison is NaN, returns true. | |||
934 | /// | |||
935 | /// \headerfile <x86intrin.h> | |||
936 | /// | |||
937 | /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. | |||
938 | /// | |||
939 | /// \param __a | |||
940 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
941 | /// compared to the lower double-precision value of \a __b. | |||
942 | /// \param __b | |||
943 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
944 | /// compared to the lower double-precision value of \a __a. | |||
945 | /// \returns A 128-bit vector. The lower 64 bits contains the comparison | |||
946 | /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. | |||
947 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, | |||
948 | __m128d __b) { | |||
949 | __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a); | |||
950 | return __extension__(__m128d){__c[0], __a[1]}; | |||
951 | } | |||
952 | ||||
953 | /// Compares the lower double-precision floating-point values in each of | |||
954 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
955 | /// the value in the first parameter is not greater than or equal to the | |||
956 | /// corresponding value in the second parameter. | |||
957 | /// | |||
958 | /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
959 | /// If either value in a comparison is NaN, returns true. | |||
960 | /// | |||
961 | /// \headerfile <x86intrin.h> | |||
962 | /// | |||
963 | /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. | |||
964 | /// | |||
965 | /// \param __a | |||
966 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
967 | /// compared to the lower double-precision value of \a __b. | |||
968 | /// \param __b | |||
969 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
970 | /// compared to the lower double-precision value of \a __a. | |||
971 | /// \returns A 128-bit vector. The lower 64 bits contains the comparison | |||
972 | /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. | |||
973 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, | |||
974 | __m128d __b) { | |||
975 | __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a); | |||
976 | return __extension__(__m128d){__c[0], __a[1]}; | |||
977 | } | |||
978 | ||||
979 | /// Compares the lower double-precision floating-point values in each of | |||
980 | /// the two 128-bit floating-point vectors of [2 x double] for equality. | |||
981 | /// | |||
982 | /// The comparison returns 0 for false, 1 for true. If either value in a | |||
983 | /// comparison is NaN, returns 0. | |||
984 | /// | |||
985 | /// \headerfile <x86intrin.h> | |||
986 | /// | |||
987 | /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. | |||
988 | /// | |||
989 | /// \param __a | |||
990 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
991 | /// compared to the lower double-precision value of \a __b. | |||
992 | /// \param __b | |||
993 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
994 | /// compared to the lower double-precision value of \a __a. | |||
995 | /// \returns An integer containing the comparison results. | |||
996 | static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, | |||
997 | __m128d __b) { | |||
998 | return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b); | |||
999 | } | |||
1000 | ||||
1001 | /// Compares the lower double-precision floating-point values in each of | |||
1002 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
1003 | /// the value in the first parameter is less than the corresponding value in | |||
1004 | /// the second parameter. | |||
1005 | /// | |||
1006 | /// The comparison returns 0 for false, 1 for true. If either value in a | |||
1007 | /// comparison is NaN, returns 0. | |||
1008 | /// | |||
1009 | /// \headerfile <x86intrin.h> | |||
1010 | /// | |||
1011 | /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. | |||
1012 | /// | |||
1013 | /// \param __a | |||
1014 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1015 | /// compared to the lower double-precision value of \a __b. | |||
1016 | /// \param __b | |||
1017 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1018 | /// compared to the lower double-precision value of \a __a. | |||
1019 | /// \returns An integer containing the comparison results. | |||
1020 | static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, | |||
1021 | __m128d __b) { | |||
1022 | return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b); | |||
1023 | } | |||
1024 | ||||
1025 | /// Compares the lower double-precision floating-point values in each of | |||
1026 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
1027 | /// the value in the first parameter is less than or equal to the | |||
1028 | /// corresponding value in the second parameter. | |||
1029 | /// | |||
1030 | /// The comparison returns 0 for false, 1 for true. If either value in a | |||
1031 | /// comparison is NaN, returns 0. | |||
1032 | /// | |||
1033 | /// \headerfile <x86intrin.h> | |||
1034 | /// | |||
1035 | /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. | |||
1036 | /// | |||
1037 | /// \param __a | |||
1038 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1039 | /// compared to the lower double-precision value of \a __b. | |||
1040 | /// \param __b | |||
1041 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1042 | /// compared to the lower double-precision value of \a __a. | |||
1043 | /// \returns An integer containing the comparison results. | |||
1044 | static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, | |||
1045 | __m128d __b) { | |||
1046 | return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b); | |||
1047 | } | |||
1048 | ||||
1049 | /// Compares the lower double-precision floating-point values in each of | |||
1050 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
1051 | /// the value in the first parameter is greater than the corresponding value | |||
1052 | /// in the second parameter. | |||
1053 | /// | |||
1054 | /// The comparison returns 0 for false, 1 for true. If either value in a | |||
1055 | /// comparison is NaN, returns 0. | |||
1056 | /// | |||
1057 | /// \headerfile <x86intrin.h> | |||
1058 | /// | |||
1059 | /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. | |||
1060 | /// | |||
1061 | /// \param __a | |||
1062 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1063 | /// compared to the lower double-precision value of \a __b. | |||
1064 | /// \param __b | |||
1065 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1066 | /// compared to the lower double-precision value of \a __a. | |||
1067 | /// \returns An integer containing the comparison results. | |||
1068 | static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, | |||
1069 | __m128d __b) { | |||
1070 | return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b); | |||
1071 | } | |||
1072 | ||||
1073 | /// Compares the lower double-precision floating-point values in each of | |||
1074 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
1075 | /// the value in the first parameter is greater than or equal to the | |||
1076 | /// corresponding value in the second parameter. | |||
1077 | /// | |||
1078 | /// The comparison returns 0 for false, 1 for true. If either value in a | |||
1079 | /// comparison is NaN, returns 0. | |||
1080 | /// | |||
1081 | /// \headerfile <x86intrin.h> | |||
1082 | /// | |||
1083 | /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. | |||
1084 | /// | |||
1085 | /// \param __a | |||
1086 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1087 | /// compared to the lower double-precision value of \a __b. | |||
1088 | /// \param __b | |||
1089 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1090 | /// compared to the lower double-precision value of \a __a. | |||
1091 | /// \returns An integer containing the comparison results. | |||
1092 | static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, | |||
1093 | __m128d __b) { | |||
1094 | return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b); | |||
1095 | } | |||
1096 | ||||
1097 | /// Compares the lower double-precision floating-point values in each of | |||
1098 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
1099 | /// the value in the first parameter is unequal to the corresponding value in | |||
1100 | /// the second parameter. | |||
1101 | /// | |||
1102 | /// The comparison returns 0 for false, 1 for true. If either value in a | |||
1103 | /// comparison is NaN, returns 1. | |||
1104 | /// | |||
1105 | /// \headerfile <x86intrin.h> | |||
1106 | /// | |||
1107 | /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. | |||
1108 | /// | |||
1109 | /// \param __a | |||
1110 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1111 | /// compared to the lower double-precision value of \a __b. | |||
1112 | /// \param __b | |||
1113 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1114 | /// compared to the lower double-precision value of \a __a. | |||
1115 | /// \returns An integer containing the comparison results. | |||
1116 | static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, | |||
1117 | __m128d __b) { | |||
1118 | return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b); | |||
1119 | } | |||
1120 | ||||
1121 | /// Compares the lower double-precision floating-point values in each of | |||
1122 | /// the two 128-bit floating-point vectors of [2 x double] for equality. | |||
1123 | /// | |||
1124 | /// The comparison returns 0 for false, 1 for true. If either value in a | |||
1125 | /// comparison is NaN, returns 0. | |||
1126 | /// | |||
1127 | /// \headerfile <x86intrin.h> | |||
1128 | /// | |||
1129 | /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. | |||
1130 | /// | |||
1131 | /// \param __a | |||
1132 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1133 | /// compared to the lower double-precision value of \a __b. | |||
1134 | /// \param __b | |||
1135 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1136 | /// compared to the lower double-precision value of \a __a. | |||
1137 | /// \returns An integer containing the comparison results. | |||
1138 | static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, | |||
1139 | __m128d __b) { | |||
1140 | return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b); | |||
1141 | } | |||
1142 | ||||
1143 | /// Compares the lower double-precision floating-point values in each of | |||
1144 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
1145 | /// the value in the first parameter is less than the corresponding value in | |||
1146 | /// the second parameter. | |||
1147 | /// | |||
1148 | /// The comparison returns 0 for false, 1 for true. If either value in a | |||
1149 | /// comparison is NaN, returns 0. | |||
1150 | /// | |||
1151 | /// \headerfile <x86intrin.h> | |||
1152 | /// | |||
1153 | /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. | |||
1154 | /// | |||
1155 | /// \param __a | |||
1156 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1157 | /// compared to the lower double-precision value of \a __b. | |||
1158 | /// \param __b | |||
1159 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1160 | /// compared to the lower double-precision value of \a __a. | |||
1161 | /// \returns An integer containing the comparison results. | |||
1162 | static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, | |||
1163 | __m128d __b) { | |||
1164 | return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b); | |||
1165 | } | |||
1166 | ||||
1167 | /// Compares the lower double-precision floating-point values in each of | |||
1168 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
1169 | /// the value in the first parameter is less than or equal to the | |||
1170 | /// corresponding value in the second parameter. | |||
1171 | /// | |||
1172 | /// The comparison returns 0 for false, 1 for true. If either value in a | |||
1173 | /// comparison is NaN, returns 0. | |||
1174 | /// | |||
1175 | /// \headerfile <x86intrin.h> | |||
1176 | /// | |||
1177 | /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. | |||
1178 | /// | |||
1179 | /// \param __a | |||
1180 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1181 | /// compared to the lower double-precision value of \a __b. | |||
1182 | /// \param __b | |||
1183 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1184 | /// compared to the lower double-precision value of \a __a. | |||
1185 | /// \returns An integer containing the comparison results. | |||
1186 | static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, | |||
1187 | __m128d __b) { | |||
1188 | return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b); | |||
1189 | } | |||
1190 | ||||
1191 | /// Compares the lower double-precision floating-point values in each of | |||
1192 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
1193 | /// the value in the first parameter is greater than the corresponding value | |||
1194 | /// in the second parameter. | |||
1195 | /// | |||
1196 | /// The comparison returns 0 for false, 1 for true. If either value in a | |||
1197 | /// comparison is NaN, returns 0. | |||
1198 | /// | |||
1199 | /// \headerfile <x86intrin.h> | |||
1200 | /// | |||
1201 | /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. | |||
1202 | /// | |||
1203 | /// \param __a | |||
1204 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1205 | /// compared to the lower double-precision value of \a __b. | |||
1206 | /// \param __b | |||
1207 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1208 | /// compared to the lower double-precision value of \a __a. | |||
1209 | /// \returns An integer containing the comparison results. | |||
1210 | static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, | |||
1211 | __m128d __b) { | |||
1212 | return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b); | |||
1213 | } | |||
1214 | ||||
1215 | /// Compares the lower double-precision floating-point values in each of | |||
1216 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
1217 | /// the value in the first parameter is greater than or equal to the | |||
1218 | /// corresponding value in the second parameter. | |||
1219 | /// | |||
1220 | /// The comparison returns 0 for false, 1 for true. If either value in a | |||
1221 | /// comparison is NaN, returns 0. | |||
1222 | /// | |||
1223 | /// \headerfile <x86intrin.h> | |||
1224 | /// | |||
1225 | /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. | |||
1226 | /// | |||
1227 | /// \param __a | |||
1228 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1229 | /// compared to the lower double-precision value of \a __b. | |||
1230 | /// \param __b | |||
1231 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1232 | /// compared to the lower double-precision value of \a __a. | |||
1233 | /// \returns An integer containing the comparison results. | |||
1234 | static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, | |||
1235 | __m128d __b) { | |||
1236 | return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b); | |||
1237 | } | |||
1238 | ||||
1239 | /// Compares the lower double-precision floating-point values in each of | |||
1240 | /// the two 128-bit floating-point vectors of [2 x double] to determine if | |||
1241 | /// the value in the first parameter is unequal to the corresponding value in | |||
1242 | /// the second parameter. | |||
1243 | /// | |||
1244 | /// The comparison returns 0 for false, 1 for true. If either value in a | |||
1245 | /// comparison is NaN, returns 1. | |||
1246 | /// | |||
1247 | /// \headerfile <x86intrin.h> | |||
1248 | /// | |||
1249 | /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. | |||
1250 | /// | |||
1251 | /// \param __a | |||
1252 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1253 | /// compared to the lower double-precision value of \a __b. | |||
1254 | /// \param __b | |||
1255 | /// A 128-bit vector of [2 x double]. The lower double-precision value is | |||
1256 | /// compared to the lower double-precision value of \a __a. | |||
1257 | /// \returns An integer containing the comparison result. | |||
1258 | static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, | |||
1259 | __m128d __b) { | |||
1260 | return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b); | |||
1261 | } | |||
1262 | ||||
1263 | /// Converts the two double-precision floating-point elements of a | |||
1264 | /// 128-bit vector of [2 x double] into two single-precision floating-point | |||
1265 | /// values, returned in the lower 64 bits of a 128-bit vector of [4 x float]. | |||
1266 | /// The upper 64 bits of the result vector are set to zero. | |||
1267 | /// | |||
1268 | /// \headerfile <x86intrin.h> | |||
1269 | /// | |||
1270 | /// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction. | |||
1271 | /// | |||
1272 | /// \param __a | |||
1273 | /// A 128-bit vector of [2 x double]. | |||
1274 | /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the | |||
1275 | /// converted values. The upper 64 bits are set to zero. | |||
1276 | static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) { | |||
1277 | return __builtin_ia32_cvtpd2ps((__v2df)__a); | |||
1278 | } | |||
1279 | ||||
1280 | /// Converts the lower two single-precision floating-point elements of a | |||
1281 | /// 128-bit vector of [4 x float] into two double-precision floating-point | |||
1282 | /// values, returned in a 128-bit vector of [2 x double]. The upper two | |||
1283 | /// elements of the input vector are unused. | |||
1284 | /// | |||
1285 | /// \headerfile <x86intrin.h> | |||
1286 | /// | |||
1287 | /// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction. | |||
1288 | /// | |||
1289 | /// \param __a | |||
1290 | /// A 128-bit vector of [4 x float]. The lower two single-precision | |||
1291 | /// floating-point elements are converted to double-precision values. The | |||
1292 | /// upper two elements are unused. | |||
1293 | /// \returns A 128-bit vector of [2 x double] containing the converted values. | |||
1294 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { | |||
1295 | return (__m128d) __builtin_convertvector( | |||
1296 | __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); | |||
1297 | } | |||
1298 | ||||
1299 | /// Converts the lower two integer elements of a 128-bit vector of | |||
1300 | /// [4 x i32] into two double-precision floating-point values, returned in a | |||
1301 | /// 128-bit vector of [2 x double]. | |||
1302 | /// | |||
1303 | /// The upper two elements of the input vector are unused. | |||
1304 | /// | |||
1305 | /// \headerfile <x86intrin.h> | |||
1306 | /// | |||
1307 | /// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction. | |||
1308 | /// | |||
1309 | /// \param __a | |||
1310 | /// A 128-bit integer vector of [4 x i32]. The lower two integer elements are | |||
1311 | /// converted to double-precision values. | |||
1312 | /// | |||
1313 | /// The upper two elements are unused. | |||
1314 | /// \returns A 128-bit vector of [2 x double] containing the converted values. | |||
1315 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a) { | |||
1316 | return (__m128d) __builtin_convertvector( | |||
1317 | __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df); | |||
1318 | } | |||
1319 | ||||
1320 | /// Converts the two double-precision floating-point elements of a | |||
1321 | /// 128-bit vector of [2 x double] into two signed 32-bit integer values, | |||
1322 | /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper | |||
1323 | /// 64 bits of the result vector are set to zero. | |||
1324 | /// | |||
1325 | /// If a converted value does not fit in a 32-bit integer, raises a | |||
1326 | /// floating-point invalid exception. If the exception is masked, returns | |||
1327 | /// the most negative integer. | |||
1328 | /// | |||
1329 | /// \headerfile <x86intrin.h> | |||
1330 | /// | |||
1331 | /// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction. | |||
1332 | /// | |||
1333 | /// \param __a | |||
1334 | /// A 128-bit vector of [2 x double]. | |||
1335 | /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the | |||
1336 | /// converted values. The upper 64 bits are set to zero. | |||
1337 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a) { | |||
1338 | return __builtin_ia32_cvtpd2dq((__v2df)__a); | |||
1339 | } | |||
1340 | ||||
1341 | /// Converts the low-order element of a 128-bit vector of [2 x double] | |||
1342 | /// into a 32-bit signed integer value. | |||
1343 | /// | |||
1344 | /// If the converted value does not fit in a 32-bit integer, raises a | |||
1345 | /// floating-point invalid exception. If the exception is masked, returns | |||
1346 | /// the most negative integer. | |||
1347 | /// | |||
1348 | /// \headerfile <x86intrin.h> | |||
1349 | /// | |||
1350 | /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. | |||
1351 | /// | |||
1352 | /// \param __a | |||
1353 | /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the | |||
1354 | /// conversion. | |||
1355 | /// \returns A 32-bit signed integer containing the converted value. | |||
1356 | static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) { | |||
1357 | return __builtin_ia32_cvtsd2si((__v2df)__a); | |||
1358 | } | |||
1359 | ||||
1360 | /// Converts the lower double-precision floating-point element of a | |||
1361 | /// 128-bit vector of [2 x double], in the second parameter, into a | |||
1362 | /// single-precision floating-point value, returned in the lower 32 bits of a | |||
1363 | /// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are | |||
1364 | /// copied from the upper 96 bits of the first parameter. | |||
1365 | /// | |||
1366 | /// \headerfile <x86intrin.h> | |||
1367 | /// | |||
1368 | /// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction. | |||
1369 | /// | |||
1370 | /// \param __a | |||
1371 | /// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are | |||
1372 | /// copied to the upper 96 bits of the result. | |||
1373 | /// \param __b | |||
1374 | /// A 128-bit vector of [2 x double]. The lower double-precision | |||
1375 | /// floating-point element is used in the conversion. | |||
1376 | /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the | |||
1377 | /// converted value from the second parameter. The upper 96 bits are copied | |||
1378 | /// from the upper 96 bits of the first parameter. | |||
1379 | static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, | |||
1380 | __m128d __b) { | |||
1381 | return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); | |||
1382 | } | |||
1383 | ||||
1384 | /// Converts a 32-bit signed integer value, in the second parameter, into | |||
1385 | /// a double-precision floating-point value, returned in the lower 64 bits of | |||
1386 | /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector | |||
1387 | /// are copied from the upper 64 bits of the first parameter. | |||
1388 | /// | |||
1389 | /// \headerfile <x86intrin.h> | |||
1390 | /// | |||
1391 | /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. | |||
1392 | /// | |||
1393 | /// \param __a | |||
1394 | /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are | |||
1395 | /// copied to the upper 64 bits of the result. | |||
1396 | /// \param __b | |||
1397 | /// A 32-bit signed integer containing the value to be converted. | |||
1398 | /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the | |||
1399 | /// converted value from the second parameter. The upper 64 bits are copied | |||
1400 | /// from the upper 64 bits of the first parameter. | |||
1401 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, | |||
1402 | int __b) { | |||
1403 | __a[0] = __b; | |||
1404 | return __a; | |||
1405 | } | |||
1406 | ||||
1407 | /// Converts the lower single-precision floating-point element of a | |||
1408 | /// 128-bit vector of [4 x float], in the second parameter, into a | |||
1409 | /// double-precision floating-point value, returned in the lower 64 bits of | |||
1410 | /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector | |||
1411 | /// are copied from the upper 64 bits of the first parameter. | |||
1412 | /// | |||
1413 | /// \headerfile <x86intrin.h> | |||
1414 | /// | |||
1415 | /// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction. | |||
1416 | /// | |||
1417 | /// \param __a | |||
1418 | /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are | |||
1419 | /// copied to the upper 64 bits of the result. | |||
1420 | /// \param __b | |||
1421 | /// A 128-bit vector of [4 x float]. The lower single-precision | |||
1422 | /// floating-point element is used in the conversion. | |||
1423 | /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the | |||
1424 | /// converted value from the second parameter. The upper 64 bits are copied | |||
1425 | /// from the upper 64 bits of the first parameter. | |||
1426 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, | |||
1427 | __m128 __b) { | |||
1428 | __a[0] = __b[0]; | |||
1429 | return __a; | |||
1430 | } | |||
1431 | ||||
1432 | /// Converts the two double-precision floating-point elements of a | |||
1433 | /// 128-bit vector of [2 x double] into two signed truncated (rounded | |||
1434 | /// toward zero) 32-bit integer values, returned in the lower 64 bits | |||
1435 | /// of a 128-bit vector of [4 x i32]. | |||
1436 | /// | |||
1437 | /// If a converted value does not fit in a 32-bit integer, raises a | |||
1438 | /// floating-point invalid exception. If the exception is masked, returns | |||
1439 | /// the most negative integer. | |||
1440 | /// | |||
1441 | /// \headerfile <x86intrin.h> | |||
1442 | /// | |||
1443 | /// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c> | |||
1444 | /// instruction. | |||
1445 | /// | |||
1446 | /// \param __a | |||
1447 | /// A 128-bit vector of [2 x double]. | |||
1448 | /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the | |||
1449 | /// converted values. The upper 64 bits are set to zero. | |||
1450 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a) { | |||
1451 | return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a); | |||
1452 | } | |||
1453 | ||||
1454 | /// Converts the low-order element of a [2 x double] vector into a 32-bit | |||
1455 | /// signed truncated (rounded toward zero) integer value. | |||
1456 | /// | |||
1457 | /// If the converted value does not fit in a 32-bit integer, raises a | |||
1458 | /// floating-point invalid exception. If the exception is masked, returns | |||
1459 | /// the most negative integer. | |||
1460 | /// | |||
1461 | /// \headerfile <x86intrin.h> | |||
1462 | /// | |||
1463 | /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> | |||
1464 | /// instruction. | |||
1465 | /// | |||
1466 | /// \param __a | |||
1467 | /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the | |||
1468 | /// conversion. | |||
1469 | /// \returns A 32-bit signed integer containing the converted value. | |||
1470 | static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a) { | |||
1471 | return __builtin_ia32_cvttsd2si((__v2df)__a); | |||
1472 | } | |||
1473 | ||||
1474 | /// Converts the two double-precision floating-point elements of a | |||
1475 | /// 128-bit vector of [2 x double] into two signed 32-bit integer values, | |||
1476 | /// returned in a 64-bit vector of [2 x i32]. | |||
1477 | /// | |||
1478 | /// If a converted value does not fit in a 32-bit integer, raises a | |||
1479 | /// floating-point invalid exception. If the exception is masked, returns | |||
1480 | /// the most negative integer. | |||
1481 | /// | |||
1482 | /// \headerfile <x86intrin.h> | |||
1483 | /// | |||
1484 | /// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction. | |||
1485 | /// | |||
1486 | /// \param __a | |||
1487 | /// A 128-bit vector of [2 x double]. | |||
1488 | /// \returns A 64-bit vector of [2 x i32] containing the converted values. | |||
1489 | static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a) { | |||
1490 | return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a); | |||
1491 | } | |||
1492 | ||||
1493 | /// Converts the two double-precision floating-point elements of a | |||
1494 | /// 128-bit vector of [2 x double] into two signed truncated (rounded toward | |||
1495 | /// zero) 32-bit integer values, returned in a 64-bit vector of [2 x i32]. | |||
1496 | /// | |||
1497 | /// If a converted value does not fit in a 32-bit integer, raises a | |||
1498 | /// floating-point invalid exception. If the exception is masked, returns | |||
1499 | /// the most negative integer. | |||
1500 | /// | |||
1501 | /// \headerfile <x86intrin.h> | |||
1502 | /// | |||
1503 | /// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction. | |||
1504 | /// | |||
1505 | /// \param __a | |||
1506 | /// A 128-bit vector of [2 x double]. | |||
1507 | /// \returns A 64-bit vector of [2 x i32] containing the converted values. | |||
1508 | static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a) { | |||
1509 | return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a); | |||
1510 | } | |||
1511 | ||||
1512 | /// Converts the two signed 32-bit integer elements of a 64-bit vector of | |||
1513 | /// [2 x i32] into two double-precision floating-point values, returned in a | |||
1514 | /// 128-bit vector of [2 x double]. | |||
1515 | /// | |||
1516 | /// \headerfile <x86intrin.h> | |||
1517 | /// | |||
1518 | /// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction. | |||
1519 | /// | |||
1520 | /// \param __a | |||
1521 | /// A 64-bit vector of [2 x i32]. | |||
1522 | /// \returns A 128-bit vector of [2 x double] containing the converted values. | |||
1523 | static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a) { | |||
1524 | return __builtin_ia32_cvtpi2pd((__v2si)__a); | |||
1525 | } | |||
1526 | ||||
1527 | /// Returns the low-order element of a 128-bit vector of [2 x double] as | |||
1528 | /// a double-precision floating-point value. | |||
1529 | /// | |||
1530 | /// \headerfile <x86intrin.h> | |||
1531 | /// | |||
1532 | /// This intrinsic has no corresponding instruction. | |||
1533 | /// | |||
1534 | /// \param __a | |||
1535 | /// A 128-bit vector of [2 x double]. The lower 64 bits are returned. | |||
1536 | /// \returns A double-precision floating-point value copied from the lower 64 | |||
1537 | /// bits of \a __a. | |||
1538 | static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a) { | |||
1539 | return __a[0]; | |||
1540 | } | |||
1541 | ||||
1542 | /// Loads a 128-bit floating-point vector of [2 x double] from an aligned | |||
1543 | /// memory location. | |||
1544 | /// | |||
1545 | /// \headerfile <x86intrin.h> | |||
1546 | /// | |||
1547 | /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction. | |||
1548 | /// | |||
1549 | /// \param __dp | |||
1550 | /// A pointer to a 128-bit memory location. The address of the memory | |||
1551 | /// location has to be 16-byte aligned. | |||
1552 | /// \returns A 128-bit vector of [2 x double] containing the loaded values. | |||
1553 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp) { | |||
1554 | return *(const __m128d *)__dp; | |||
1555 | } | |||
1556 | ||||
1557 | /// Loads a double-precision floating-point value from a specified memory | |||
1558 | /// location and duplicates it to both vector elements of a 128-bit vector of | |||
1559 | /// [2 x double]. | |||
1560 | /// | |||
1561 | /// \headerfile <x86intrin.h> | |||
1562 | /// | |||
1563 | /// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction. | |||
1564 | /// | |||
1565 | /// \param __dp | |||
1566 | /// A pointer to a memory location containing a double-precision value. | |||
1567 | /// \returns A 128-bit vector of [2 x double] containing the loaded and | |||
1568 | /// duplicated values. | |||
1569 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp) { | |||
1570 | struct __mm_load1_pd_struct { | |||
1571 | double __u; | |||
1572 | } __attribute__((__packed__, __may_alias__)); | |||
1573 | double __u = ((const struct __mm_load1_pd_struct *)__dp)->__u; | |||
1574 | return __extension__(__m128d){__u, __u}; | |||
1575 | } | |||
1576 | ||||
1577 | #define _mm_load_pd1(dp)_mm_load1_pd(dp) _mm_load1_pd(dp) | |||
1578 | ||||
1579 | /// Loads two double-precision values, in reverse order, from an aligned | |||
1580 | /// memory location into a 128-bit vector of [2 x double]. | |||
1581 | /// | |||
1582 | /// \headerfile <x86intrin.h> | |||
1583 | /// | |||
1584 | /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction + | |||
1585 | /// needed shuffling instructions. In AVX mode, the shuffling may be combined | |||
1586 | /// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction. | |||
1587 | /// | |||
1588 | /// \param __dp | |||
1589 | /// A 16-byte aligned pointer to an array of double-precision values to be | |||
1590 | /// loaded in reverse order. | |||
1591 | /// \returns A 128-bit vector of [2 x double] containing the reversed loaded | |||
1592 | /// values. | |||
1593 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp) { | |||
1594 | __m128d __u = *(const __m128d *)__dp; | |||
1595 | return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0); | |||
1596 | } | |||
1597 | ||||
1598 | /// Loads a 128-bit floating-point vector of [2 x double] from an | |||
1599 | /// unaligned memory location. | |||
1600 | /// | |||
1601 | /// \headerfile <x86intrin.h> | |||
1602 | /// | |||
1603 | /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. | |||
1604 | /// | |||
1605 | /// \param __dp | |||
1606 | /// A pointer to a 128-bit memory location. The address of the memory | |||
1607 | /// location does not have to be aligned. | |||
1608 | /// \returns A 128-bit vector of [2 x double] containing the loaded values. | |||
1609 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp) { | |||
1610 | struct __loadu_pd { | |||
1611 | __m128d_u __v; | |||
1612 | } __attribute__((__packed__, __may_alias__)); | |||
1613 | return ((const struct __loadu_pd *)__dp)->__v; | |||
1614 | } | |||
1615 | ||||
1616 | /// Loads a 64-bit integer value to the low element of a 128-bit integer | |||
1617 | /// vector and clears the upper element. | |||
1618 | /// | |||
1619 | /// \headerfile <x86intrin.h> | |||
1620 | /// | |||
1621 | /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. | |||
1622 | /// | |||
1623 | /// \param __a | |||
1624 | /// A pointer to a 64-bit memory location. The address of the memory | |||
1625 | /// location does not have to be aligned. | |||
1626 | /// \returns A 128-bit vector of [2 x i64] containing the loaded value. | |||
1627 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si64(void const *__a) { | |||
1628 | struct __loadu_si64 { | |||
1629 | long long __v; | |||
1630 | } __attribute__((__packed__, __may_alias__)); | |||
1631 | long long __u = ((const struct __loadu_si64 *)__a)->__v; | |||
1632 | return __extension__(__m128i)(__v2di){__u, 0LL}; | |||
1633 | } | |||
1634 | ||||
1635 | /// Loads a 32-bit integer value to the low element of a 128-bit integer | |||
1636 | /// vector and clears the upper element. | |||
1637 | /// | |||
1638 | /// \headerfile <x86intrin.h> | |||
1639 | /// | |||
1640 | /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. | |||
1641 | /// | |||
1642 | /// \param __a | |||
1643 | /// A pointer to a 32-bit memory location. The address of the memory | |||
1644 | /// location does not have to be aligned. | |||
1645 | /// \returns A 128-bit vector of [4 x i32] containing the loaded value. | |||
1646 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si32(void const *__a) { | |||
1647 | struct __loadu_si32 { | |||
1648 | int __v; | |||
1649 | } __attribute__((__packed__, __may_alias__)); | |||
1650 | int __u = ((const struct __loadu_si32 *)__a)->__v; | |||
1651 | return __extension__(__m128i)(__v4si){__u, 0, 0, 0}; | |||
1652 | } | |||
1653 | ||||
1654 | /// Loads a 16-bit integer value to the low element of a 128-bit integer | |||
1655 | /// vector and clears the upper element. | |||
1656 | /// | |||
1657 | /// \headerfile <x86intrin.h> | |||
1658 | /// | |||
1659 | /// This intrinsic does not correspond to a specific instruction. | |||
1660 | /// | |||
1661 | /// \param __a | |||
1662 | /// A pointer to a 16-bit memory location. The address of the memory | |||
1663 | /// location does not have to be aligned. | |||
1664 | /// \returns A 128-bit vector of [8 x i16] containing the loaded value. | |||
1665 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si16(void const *__a) { | |||
1666 | struct __loadu_si16 { | |||
1667 | short __v; | |||
1668 | } __attribute__((__packed__, __may_alias__)); | |||
1669 | short __u = ((const struct __loadu_si16 *)__a)->__v; | |||
1670 | return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0}; | |||
1671 | } | |||
1672 | ||||
1673 | /// Loads a 64-bit double-precision value to the low element of a | |||
1674 | /// 128-bit integer vector and clears the upper element. | |||
1675 | /// | |||
1676 | /// \headerfile <x86intrin.h> | |||
1677 | /// | |||
1678 | /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. | |||
1679 | /// | |||
1680 | /// \param __dp | |||
1681 | /// A pointer to a memory location containing a double-precision value. | |||
1682 | /// The address of the memory location does not have to be aligned. | |||
1683 | /// \returns A 128-bit vector of [2 x double] containing the loaded value. | |||
1684 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp) { | |||
1685 | struct __mm_load_sd_struct { | |||
1686 | double __u; | |||
1687 | } __attribute__((__packed__, __may_alias__)); | |||
1688 | double __u = ((const struct __mm_load_sd_struct *)__dp)->__u; | |||
1689 | return __extension__(__m128d){__u, 0}; | |||
1690 | } | |||
1691 | ||||
1692 | /// Loads a double-precision value into the high-order bits of a 128-bit | |||
1693 | /// vector of [2 x double]. The low-order bits are copied from the low-order | |||
1694 | /// bits of the first operand. | |||
1695 | /// | |||
1696 | /// \headerfile <x86intrin.h> | |||
1697 | /// | |||
1698 | /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. | |||
1699 | /// | |||
1700 | /// \param __a | |||
1701 | /// A 128-bit vector of [2 x double]. \n | |||
1702 | /// Bits [63:0] are written to bits [63:0] of the result. | |||
1703 | /// \param __dp | |||
1704 | /// A pointer to a 64-bit memory location containing a double-precision | |||
1705 | /// floating-point value that is loaded. The loaded value is written to bits | |||
1706 | /// [127:64] of the result. The address of the memory location does not have | |||
1707 | /// to be aligned. | |||
1708 | /// \returns A 128-bit vector of [2 x double] containing the moved values. | |||
1709 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, | |||
1710 | double const *__dp) { | |||
1711 | struct __mm_loadh_pd_struct { | |||
1712 | double __u; | |||
1713 | } __attribute__((__packed__, __may_alias__)); | |||
1714 | double __u = ((const struct __mm_loadh_pd_struct *)__dp)->__u; | |||
1715 | return __extension__(__m128d){__a[0], __u}; | |||
1716 | } | |||
1717 | ||||
1718 | /// Loads a double-precision value into the low-order bits of a 128-bit | |||
1719 | /// vector of [2 x double]. The high-order bits are copied from the | |||
1720 | /// high-order bits of the first operand. | |||
1721 | /// | |||
1722 | /// \headerfile <x86intrin.h> | |||
1723 | /// | |||
1724 | /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. | |||
1725 | /// | |||
1726 | /// \param __a | |||
1727 | /// A 128-bit vector of [2 x double]. \n | |||
1728 | /// Bits [127:64] are written to bits [127:64] of the result. | |||
1729 | /// \param __dp | |||
1730 | /// A pointer to a 64-bit memory location containing a double-precision | |||
1731 | /// floating-point value that is loaded. The loaded value is written to bits | |||
1732 | /// [63:0] of the result. The address of the memory location does not have to | |||
1733 | /// be aligned. | |||
1734 | /// \returns A 128-bit vector of [2 x double] containing the moved values. | |||
1735 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, | |||
1736 | double const *__dp) { | |||
1737 | struct __mm_loadl_pd_struct { | |||
1738 | double __u; | |||
1739 | } __attribute__((__packed__, __may_alias__)); | |||
1740 | double __u = ((const struct __mm_loadl_pd_struct *)__dp)->__u; | |||
1741 | return __extension__(__m128d){__u, __a[1]}; | |||
1742 | } | |||
1743 | ||||
1744 | /// Constructs a 128-bit floating-point vector of [2 x double] with | |||
1745 | /// unspecified content. This could be used as an argument to another | |||
1746 | /// intrinsic function where the argument is required but the value is not | |||
1747 | /// actually used. | |||
1748 | /// | |||
1749 | /// \headerfile <x86intrin.h> | |||
1750 | /// | |||
1751 | /// This intrinsic has no corresponding instruction. | |||
1752 | /// | |||
1753 | /// \returns A 128-bit floating-point vector of [2 x double] with unspecified | |||
1754 | /// content. | |||
1755 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void) { | |||
1756 | return (__m128d)__builtin_ia32_undef128(); | |||
1757 | } | |||
1758 | ||||
1759 | /// Constructs a 128-bit floating-point vector of [2 x double]. The lower | |||
1760 | /// 64 bits of the vector are initialized with the specified double-precision | |||
1761 | /// floating-point value. The upper 64 bits are set to zero. | |||
1762 | /// | |||
1763 | /// \headerfile <x86intrin.h> | |||
1764 | /// | |||
1765 | /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. | |||
1766 | /// | |||
1767 | /// \param __w | |||
1768 | /// A double-precision floating-point value used to initialize the lower 64 | |||
1769 | /// bits of the result. | |||
1770 | /// \returns An initialized 128-bit floating-point vector of [2 x double]. The | |||
1771 | /// lower 64 bits contain the value of the parameter. The upper 64 bits are | |||
1772 | /// set to zero. | |||
1773 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) { | |||
1774 | return __extension__(__m128d){__w, 0.0}; | |||
1775 | } | |||
1776 | ||||
1777 | /// Constructs a 128-bit floating-point vector of [2 x double], with each | |||
1778 | /// of the two double-precision floating-point vector elements set to the | |||
1779 | /// specified double-precision floating-point value. | |||
1780 | /// | |||
1781 | /// \headerfile <x86intrin.h> | |||
1782 | /// | |||
1783 | /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. | |||
1784 | /// | |||
1785 | /// \param __w | |||
1786 | /// A double-precision floating-point value used to initialize each vector | |||
1787 | /// element of the result. | |||
1788 | /// \returns An initialized 128-bit floating-point vector of [2 x double]. | |||
1789 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w) { | |||
1790 | return __extension__(__m128d){__w, __w}; | |||
1791 | } | |||
1792 | ||||
1793 | /// Constructs a 128-bit floating-point vector of [2 x double], with each | |||
1794 | /// of the two double-precision floating-point vector elements set to the | |||
1795 | /// specified double-precision floating-point value. | |||
1796 | /// | |||
1797 | /// \headerfile <x86intrin.h> | |||
1798 | /// | |||
1799 | /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. | |||
1800 | /// | |||
1801 | /// \param __w | |||
1802 | /// A double-precision floating-point value used to initialize each vector | |||
1803 | /// element of the result. | |||
1804 | /// \returns An initialized 128-bit floating-point vector of [2 x double]. | |||
1805 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd1(double __w) { | |||
1806 | return _mm_set1_pd(__w); | |||
1807 | } | |||
1808 | ||||
1809 | /// Constructs a 128-bit floating-point vector of [2 x double] | |||
1810 | /// initialized with the specified double-precision floating-point values. | |||
1811 | /// | |||
1812 | /// \headerfile <x86intrin.h> | |||
1813 | /// | |||
1814 | /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. | |||
1815 | /// | |||
1816 | /// \param __w | |||
1817 | /// A double-precision floating-point value used to initialize the upper 64 | |||
1818 | /// bits of the result. | |||
1819 | /// \param __x | |||
1820 | /// A double-precision floating-point value used to initialize the lower 64 | |||
1821 | /// bits of the result. | |||
1822 | /// \returns An initialized 128-bit floating-point vector of [2 x double]. | |||
1823 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, | |||
1824 | double __x) { | |||
1825 | return __extension__(__m128d){__x, __w}; | |||
1826 | } | |||
1827 | ||||
1828 | /// Constructs a 128-bit floating-point vector of [2 x double], | |||
1829 | /// initialized in reverse order with the specified double-precision | |||
1830 | /// floating-point values. | |||
1831 | /// | |||
1832 | /// \headerfile <x86intrin.h> | |||
1833 | /// | |||
1834 | /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. | |||
1835 | /// | |||
1836 | /// \param __w | |||
1837 | /// A double-precision floating-point value used to initialize the lower 64 | |||
1838 | /// bits of the result. | |||
1839 | /// \param __x | |||
1840 | /// A double-precision floating-point value used to initialize the upper 64 | |||
1841 | /// bits of the result. | |||
1842 | /// \returns An initialized 128-bit floating-point vector of [2 x double]. | |||
1843 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, | |||
1844 | double __x) { | |||
1845 | return __extension__(__m128d){__w, __x}; | |||
1846 | } | |||
1847 | ||||
1848 | /// Constructs a 128-bit floating-point vector of [2 x double] | |||
1849 | /// initialized to zero. | |||
1850 | /// | |||
1851 | /// \headerfile <x86intrin.h> | |||
1852 | /// | |||
1853 | /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. | |||
1854 | /// | |||
1855 | /// \returns An initialized 128-bit floating-point vector of [2 x double] with | |||
1856 | /// all elements set to zero. | |||
1857 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) { | |||
1858 | return __extension__(__m128d){0.0, 0.0}; | |||
1859 | } | |||
1860 | ||||
1861 | /// Constructs a 128-bit floating-point vector of [2 x double]. The lower | |||
1862 | /// 64 bits are set to the lower 64 bits of the second parameter. The upper | |||
1863 | /// 64 bits are set to the upper 64 bits of the first parameter. | |||
1864 | /// | |||
1865 | /// \headerfile <x86intrin.h> | |||
1866 | /// | |||
1867 | /// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction. | |||
1868 | /// | |||
1869 | /// \param __a | |||
1870 | /// A 128-bit vector of [2 x double]. The upper 64 bits are written to the | |||
1871 | /// upper 64 bits of the result. | |||
1872 | /// \param __b | |||
1873 | /// A 128-bit vector of [2 x double]. The lower 64 bits are written to the | |||
1874 | /// lower 64 bits of the result. | |||
1875 | /// \returns A 128-bit vector of [2 x double] containing the moved values. | |||
1876 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, | |||
1877 | __m128d __b) { | |||
1878 | __a[0] = __b[0]; | |||
1879 | return __a; | |||
1880 | } | |||
1881 | ||||
1882 | /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a | |||
1883 | /// memory location. | |||
1884 | /// | |||
1885 | /// \headerfile <x86intrin.h> | |||
1886 | /// | |||
1887 | /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. | |||
1888 | /// | |||
1889 | /// \param __dp | |||
1890 | /// A pointer to a 64-bit memory location. | |||
1891 | /// \param __a | |||
1892 | /// A 128-bit vector of [2 x double] containing the value to be stored. | |||
1893 | static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, | |||
1894 | __m128d __a) { | |||
1895 | struct __mm_store_sd_struct { | |||
1896 | double __u; | |||
1897 | } __attribute__((__packed__, __may_alias__)); | |||
1898 | ((struct __mm_store_sd_struct *)__dp)->__u = __a[0]; | |||
1899 | } | |||
1900 | ||||
1901 | /// Moves packed double-precision values from a 128-bit vector of | |||
1902 | /// [2 x double] to a memory location. | |||
1903 | /// | |||
1904 | /// \headerfile <x86intrin.h> | |||
1905 | /// | |||
1906 | /// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction. | |||
1907 | /// | |||
1908 | /// \param __dp | |||
1909 | /// A pointer to an aligned memory location that can store two | |||
1910 | /// double-precision values. | |||
1911 | /// \param __a | |||
1912 | /// A packed 128-bit vector of [2 x double] containing the values to be | |||
1913 | /// moved. | |||
1914 | static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, | |||
1915 | __m128d __a) { | |||
1916 | *(__m128d *)__dp = __a; | |||
1917 | } | |||
1918 | ||||
1919 | /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to | |||
1920 | /// the upper and lower 64 bits of a memory location. | |||
1921 | /// | |||
1922 | /// \headerfile <x86intrin.h> | |||
1923 | /// | |||
1924 | /// This intrinsic corresponds to the | |||
1925 | /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. | |||
1926 | /// | |||
1927 | /// \param __dp | |||
1928 | /// A pointer to a memory location that can store two double-precision | |||
1929 | /// values. | |||
1930 | /// \param __a | |||
1931 | /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each | |||
1932 | /// of the values in \a __dp. | |||
1933 | static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, | |||
1934 | __m128d __a) { | |||
1935 | __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); | |||
1936 | _mm_store_pd(__dp, __a); | |||
1937 | } | |||
1938 | ||||
1939 | /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to | |||
1940 | /// the upper and lower 64 bits of a memory location. | |||
1941 | /// | |||
1942 | /// \headerfile <x86intrin.h> | |||
1943 | /// | |||
1944 | /// This intrinsic corresponds to the | |||
1945 | /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. | |||
1946 | /// | |||
1947 | /// \param __dp | |||
1948 | /// A pointer to a memory location that can store two double-precision | |||
1949 | /// values. | |||
1950 | /// \param __a | |||
1951 | /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each | |||
1952 | /// of the values in \a __dp. | |||
1953 | static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, | |||
1954 | __m128d __a) { | |||
1955 | _mm_store1_pd(__dp, __a); | |||
1956 | } | |||
1957 | ||||
1958 | /// Stores a 128-bit vector of [2 x double] into an unaligned memory | |||
1959 | /// location. | |||
1960 | /// | |||
1961 | /// \headerfile <x86intrin.h> | |||
1962 | /// | |||
1963 | /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. | |||
1964 | /// | |||
1965 | /// \param __dp | |||
1966 | /// A pointer to a 128-bit memory location. The address of the memory | |||
1967 | /// location does not have to be aligned. | |||
1968 | /// \param __a | |||
1969 | /// A 128-bit vector of [2 x double] containing the values to be stored. | |||
1970 | static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, | |||
1971 | __m128d __a) { | |||
1972 | struct __storeu_pd { | |||
1973 | __m128d_u __v; | |||
1974 | } __attribute__((__packed__, __may_alias__)); | |||
1975 | ((struct __storeu_pd *)__dp)->__v = __a; | |||
1976 | } | |||
1977 | ||||
1978 | /// Stores two double-precision values, in reverse order, from a 128-bit | |||
1979 | /// vector of [2 x double] to a 16-byte aligned memory location. | |||
1980 | /// | |||
1981 | /// \headerfile <x86intrin.h> | |||
1982 | /// | |||
1983 | /// This intrinsic corresponds to a shuffling instruction followed by a | |||
1984 | /// <c> VMOVAPD / MOVAPD </c> instruction. | |||
1985 | /// | |||
1986 | /// \param __dp | |||
1987 | /// A pointer to a 16-byte aligned memory location that can store two | |||
1988 | /// double-precision values. | |||
1989 | /// \param __a | |||
1990 | /// A 128-bit vector of [2 x double] containing the values to be reversed and | |||
1991 | /// stored. | |||
1992 | static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, | |||
1993 | __m128d __a) { | |||
1994 | __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0); | |||
1995 | *(__m128d *)__dp = __a; | |||
1996 | } | |||
1997 | ||||
1998 | /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a | |||
1999 | /// memory location. | |||
2000 | /// | |||
2001 | /// \headerfile <x86intrin.h> | |||
2002 | /// | |||
2003 | /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. | |||
2004 | /// | |||
2005 | /// \param __dp | |||
2006 | /// A pointer to a 64-bit memory location. | |||
2007 | /// \param __a | |||
2008 | /// A 128-bit vector of [2 x double] containing the value to be stored. | |||
2009 | static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, | |||
2010 | __m128d __a) { | |||
2011 | struct __mm_storeh_pd_struct { | |||
2012 | double __u; | |||
2013 | } __attribute__((__packed__, __may_alias__)); | |||
2014 | ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[1]; | |||
2015 | } | |||
2016 | ||||
2017 | /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a | |||
2018 | /// memory location. | |||
2019 | /// | |||
2020 | /// \headerfile <x86intrin.h> | |||
2021 | /// | |||
2022 | /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. | |||
2023 | /// | |||
2024 | /// \param __dp | |||
2025 | /// A pointer to a 64-bit memory location. | |||
2026 | /// \param __a | |||
2027 | /// A 128-bit vector of [2 x double] containing the value to be stored. | |||
2028 | static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, | |||
2029 | __m128d __a) { | |||
2030 | struct __mm_storeh_pd_struct { | |||
2031 | double __u; | |||
2032 | } __attribute__((__packed__, __may_alias__)); | |||
2033 | ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[0]; | |||
2034 | } | |||
2035 | ||||
2036 | /// Adds the corresponding elements of two 128-bit vectors of [16 x i8], | |||
2037 | /// saving the lower 8 bits of each sum in the corresponding element of a | |||
2038 | /// 128-bit result vector of [16 x i8]. | |||
2039 | /// | |||
2040 | /// The integer elements of both parameters can be either signed or unsigned. | |||
2041 | /// | |||
2042 | /// \headerfile <x86intrin.h> | |||
2043 | /// | |||
2044 | /// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction. | |||
2045 | /// | |||
2046 | /// \param __a | |||
2047 | /// A 128-bit vector of [16 x i8]. | |||
2048 | /// \param __b | |||
2049 | /// A 128-bit vector of [16 x i8]. | |||
2050 | /// \returns A 128-bit vector of [16 x i8] containing the sums of both | |||
2051 | /// parameters. | |||
2052 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, | |||
2053 | __m128i __b) { | |||
2054 | return (__m128i)((__v16qu)__a + (__v16qu)__b); | |||
2055 | } | |||
2056 | ||||
2057 | /// Adds the corresponding elements of two 128-bit vectors of [8 x i16], | |||
2058 | /// saving the lower 16 bits of each sum in the corresponding element of a | |||
2059 | /// 128-bit result vector of [8 x i16]. | |||
2060 | /// | |||
2061 | /// The integer elements of both parameters can be either signed or unsigned. | |||
2062 | /// | |||
2063 | /// \headerfile <x86intrin.h> | |||
2064 | /// | |||
2065 | /// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction. | |||
2066 | /// | |||
2067 | /// \param __a | |||
2068 | /// A 128-bit vector of [8 x i16]. | |||
2069 | /// \param __b | |||
2070 | /// A 128-bit vector of [8 x i16]. | |||
2071 | /// \returns A 128-bit vector of [8 x i16] containing the sums of both | |||
2072 | /// parameters. | |||
2073 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, | |||
2074 | __m128i __b) { | |||
2075 | return (__m128i)((__v8hu)__a + (__v8hu)__b); | |||
2076 | } | |||
2077 | ||||
2078 | /// Adds the corresponding elements of two 128-bit vectors of [4 x i32], | |||
2079 | /// saving the lower 32 bits of each sum in the corresponding element of a | |||
2080 | /// 128-bit result vector of [4 x i32]. | |||
2081 | /// | |||
2082 | /// The integer elements of both parameters can be either signed or unsigned. | |||
2083 | /// | |||
2084 | /// \headerfile <x86intrin.h> | |||
2085 | /// | |||
2086 | /// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction. | |||
2087 | /// | |||
2088 | /// \param __a | |||
2089 | /// A 128-bit vector of [4 x i32]. | |||
2090 | /// \param __b | |||
2091 | /// A 128-bit vector of [4 x i32]. | |||
2092 | /// \returns A 128-bit vector of [4 x i32] containing the sums of both | |||
2093 | /// parameters. | |||
2094 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, | |||
2095 | __m128i __b) { | |||
2096 | return (__m128i)((__v4su)__a + (__v4su)__b); | |||
2097 | } | |||
2098 | ||||
2099 | /// Adds two signed or unsigned 64-bit integer values, returning the | |||
2100 | /// lower 64 bits of the sum. | |||
2101 | /// | |||
2102 | /// \headerfile <x86intrin.h> | |||
2103 | /// | |||
2104 | /// This intrinsic corresponds to the <c> PADDQ </c> instruction. | |||
2105 | /// | |||
2106 | /// \param __a | |||
2107 | /// A 64-bit integer. | |||
2108 | /// \param __b | |||
2109 | /// A 64-bit integer. | |||
2110 | /// \returns A 64-bit integer containing the sum of both parameters. | |||
2111 | static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, | |||
2112 | __m64 __b) { | |||
2113 | return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b); | |||
2114 | } | |||
2115 | ||||
2116 | /// Adds the corresponding elements of two 128-bit vectors of [2 x i64], | |||
2117 | /// saving the lower 64 bits of each sum in the corresponding element of a | |||
2118 | /// 128-bit result vector of [2 x i64]. | |||
2119 | /// | |||
2120 | /// The integer elements of both parameters can be either signed or unsigned. | |||
2121 | /// | |||
2122 | /// \headerfile <x86intrin.h> | |||
2123 | /// | |||
2124 | /// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction. | |||
2125 | /// | |||
2126 | /// \param __a | |||
2127 | /// A 128-bit vector of [2 x i64]. | |||
2128 | /// \param __b | |||
2129 | /// A 128-bit vector of [2 x i64]. | |||
2130 | /// \returns A 128-bit vector of [2 x i64] containing the sums of both | |||
2131 | /// parameters. | |||
2132 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, | |||
2133 | __m128i __b) { | |||
2134 | return (__m128i)((__v2du)__a + (__v2du)__b); | |||
2135 | } | |||
2136 | ||||
2137 | /// Adds, with saturation, the corresponding elements of two 128-bit | |||
2138 | /// signed [16 x i8] vectors, saving each sum in the corresponding element | |||
2139 | /// of a 128-bit result vector of [16 x i8]. | |||
2140 | /// | |||
2141 | /// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums | |||
2142 | /// less than 0x80 are saturated to 0x80. | |||
2143 | /// | |||
2144 | /// \headerfile <x86intrin.h> | |||
2145 | /// | |||
2146 | /// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction. | |||
2147 | /// | |||
2148 | /// \param __a | |||
2149 | /// A 128-bit signed [16 x i8] vector. | |||
2150 | /// \param __b | |||
2151 | /// A 128-bit signed [16 x i8] vector. | |||
2152 | /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of | |||
2153 | /// both parameters. | |||
2154 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, | |||
2155 | __m128i __b) { | |||
2156 | return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b); | |||
2157 | } | |||
2158 | ||||
2159 | /// Adds, with saturation, the corresponding elements of two 128-bit | |||
2160 | /// signed [8 x i16] vectors, saving each sum in the corresponding element | |||
2161 | /// of a 128-bit result vector of [8 x i16]. | |||
2162 | /// | |||
2163 | /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums | |||
2164 | /// less than 0x8000 are saturated to 0x8000. | |||
2165 | /// | |||
2166 | /// \headerfile <x86intrin.h> | |||
2167 | /// | |||
2168 | /// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction. | |||
2169 | /// | |||
2170 | /// \param __a | |||
2171 | /// A 128-bit signed [8 x i16] vector. | |||
2172 | /// \param __b | |||
2173 | /// A 128-bit signed [8 x i16] vector. | |||
2174 | /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of | |||
2175 | /// both parameters. | |||
2176 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, | |||
2177 | __m128i __b) { | |||
2178 | return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b); | |||
2179 | } | |||
2180 | ||||
2181 | /// Adds, with saturation, the corresponding elements of two 128-bit | |||
2182 | /// unsigned [16 x i8] vectors, saving each sum in the corresponding element | |||
2183 | /// of a 128-bit result vector of [16 x i8]. | |||
2184 | /// | |||
2185 | /// Positive sums greater than 0xFF are saturated to 0xFF. Negative sums are | |||
2186 | /// saturated to 0x00. | |||
2187 | /// | |||
2188 | /// \headerfile <x86intrin.h> | |||
2189 | /// | |||
2190 | /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. | |||
2191 | /// | |||
2192 | /// \param __a | |||
2193 | /// A 128-bit unsigned [16 x i8] vector. | |||
2194 | /// \param __b | |||
2195 | /// A 128-bit unsigned [16 x i8] vector. | |||
2196 | /// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums | |||
2197 | /// of both parameters. | |||
2198 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, | |||
2199 | __m128i __b) { | |||
2200 | return (__m128i)__builtin_elementwise_add_sat((__v16qu)__a, (__v16qu)__b); | |||
2201 | } | |||
2202 | ||||
2203 | /// Adds, with saturation, the corresponding elements of two 128-bit | |||
2204 | /// unsigned [8 x i16] vectors, saving each sum in the corresponding element | |||
2205 | /// of a 128-bit result vector of [8 x i16]. | |||
2206 | /// | |||
2207 | /// Positive sums greater than 0xFFFF are saturated to 0xFFFF. Negative sums | |||
2208 | /// are saturated to 0x0000. | |||
2209 | /// | |||
2210 | /// \headerfile <x86intrin.h> | |||
2211 | /// | |||
2212 | /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. | |||
2213 | /// | |||
2214 | /// \param __a | |||
2215 | /// A 128-bit unsigned [8 x i16] vector. | |||
2216 | /// \param __b | |||
2217 | /// A 128-bit unsigned [8 x i16] vector. | |||
2218 | /// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums | |||
2219 | /// of both parameters. | |||
2220 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, | |||
2221 | __m128i __b) { | |||
2222 | return (__m128i)__builtin_elementwise_add_sat((__v8hu)__a, (__v8hu)__b); | |||
2223 | } | |||
2224 | ||||
2225 | /// Computes the rounded averages of corresponding elements of two | |||
2226 | /// 128-bit unsigned [16 x i8] vectors, saving each result in the | |||
2227 | /// corresponding element of a 128-bit result vector of [16 x i8]. | |||
2228 | /// | |||
2229 | /// \headerfile <x86intrin.h> | |||
2230 | /// | |||
2231 | /// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction. | |||
2232 | /// | |||
2233 | /// \param __a | |||
2234 | /// A 128-bit unsigned [16 x i8] vector. | |||
2235 | /// \param __b | |||
2236 | /// A 128-bit unsigned [16 x i8] vector. | |||
2237 | /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded | |||
2238 | /// averages of both parameters. | |||
2239 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, | |||
2240 | __m128i __b) { | |||
2241 | return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); | |||
2242 | } | |||
2243 | ||||
2244 | /// Computes the rounded averages of corresponding elements of two | |||
2245 | /// 128-bit unsigned [8 x i16] vectors, saving each result in the | |||
2246 | /// corresponding element of a 128-bit result vector of [8 x i16]. | |||
2247 | /// | |||
2248 | /// \headerfile <x86intrin.h> | |||
2249 | /// | |||
2250 | /// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction. | |||
2251 | /// | |||
2252 | /// \param __a | |||
2253 | /// A 128-bit unsigned [8 x i16] vector. | |||
2254 | /// \param __b | |||
2255 | /// A 128-bit unsigned [8 x i16] vector. | |||
2256 | /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded | |||
2257 | /// averages of both parameters. | |||
2258 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, | |||
2259 | __m128i __b) { | |||
2260 | return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); | |||
2261 | } | |||
2262 | ||||
2263 | /// Multiplies the corresponding elements of two 128-bit signed [8 x i16] | |||
2264 | /// vectors, producing eight intermediate 32-bit signed integer products, and | |||
2265 | /// adds the consecutive pairs of 32-bit products to form a 128-bit signed | |||
2266 | /// [4 x i32] vector. | |||
2267 | /// | |||
2268 | /// For example, bits [15:0] of both parameters are multiplied producing a | |||
2269 | /// 32-bit product, bits [31:16] of both parameters are multiplied producing | |||
2270 | /// a 32-bit product, and the sum of those two products becomes bits [31:0] | |||
2271 | /// of the result. | |||
2272 | /// | |||
2273 | /// \headerfile <x86intrin.h> | |||
2274 | /// | |||
2275 | /// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction. | |||
2276 | /// | |||
2277 | /// \param __a | |||
2278 | /// A 128-bit signed [8 x i16] vector. | |||
2279 | /// \param __b | |||
2280 | /// A 128-bit signed [8 x i16] vector. | |||
2281 | /// \returns A 128-bit signed [4 x i32] vector containing the sums of products | |||
2282 | /// of both parameters. | |||
2283 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, | |||
2284 | __m128i __b) { | |||
2285 | return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); | |||
2286 | } | |||
2287 | ||||
2288 | /// Compares corresponding elements of two 128-bit signed [8 x i16] | |||
2289 | /// vectors, saving the greater value from each comparison in the | |||
2290 | /// corresponding element of a 128-bit result vector of [8 x i16]. | |||
2291 | /// | |||
2292 | /// \headerfile <x86intrin.h> | |||
2293 | /// | |||
2294 | /// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction. | |||
2295 | /// | |||
2296 | /// \param __a | |||
2297 | /// A 128-bit signed [8 x i16] vector. | |||
2298 | /// \param __b | |||
2299 | /// A 128-bit signed [8 x i16] vector. | |||
2300 | /// \returns A 128-bit signed [8 x i16] vector containing the greater value of | |||
2301 | /// each comparison. | |||
2302 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, | |||
2303 | __m128i __b) { | |||
2304 | return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b); | |||
2305 | } | |||
2306 | ||||
2307 | /// Compares corresponding elements of two 128-bit unsigned [16 x i8] | |||
2308 | /// vectors, saving the greater value from each comparison in the | |||
2309 | /// corresponding element of a 128-bit result vector of [16 x i8]. | |||
2310 | /// | |||
2311 | /// \headerfile <x86intrin.h> | |||
2312 | /// | |||
2313 | /// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction. | |||
2314 | /// | |||
2315 | /// \param __a | |||
2316 | /// A 128-bit unsigned [16 x i8] vector. | |||
2317 | /// \param __b | |||
2318 | /// A 128-bit unsigned [16 x i8] vector. | |||
2319 | /// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of | |||
2320 | /// each comparison. | |||
2321 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, | |||
2322 | __m128i __b) { | |||
2323 | return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b); | |||
2324 | } | |||
2325 | ||||
2326 | /// Compares corresponding elements of two 128-bit signed [8 x i16] | |||
2327 | /// vectors, saving the smaller value from each comparison in the | |||
2328 | /// corresponding element of a 128-bit result vector of [8 x i16]. | |||
2329 | /// | |||
2330 | /// \headerfile <x86intrin.h> | |||
2331 | /// | |||
2332 | /// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction. | |||
2333 | /// | |||
2334 | /// \param __a | |||
2335 | /// A 128-bit signed [8 x i16] vector. | |||
2336 | /// \param __b | |||
2337 | /// A 128-bit signed [8 x i16] vector. | |||
2338 | /// \returns A 128-bit signed [8 x i16] vector containing the smaller value of | |||
2339 | /// each comparison. | |||
2340 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, | |||
2341 | __m128i __b) { | |||
2342 | return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b); | |||
2343 | } | |||
2344 | ||||
2345 | /// Compares corresponding elements of two 128-bit unsigned [16 x i8] | |||
2346 | /// vectors, saving the smaller value from each comparison in the | |||
2347 | /// corresponding element of a 128-bit result vector of [16 x i8]. | |||
2348 | /// | |||
2349 | /// \headerfile <x86intrin.h> | |||
2350 | /// | |||
2351 | /// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction. | |||
2352 | /// | |||
2353 | /// \param __a | |||
2354 | /// A 128-bit unsigned [16 x i8] vector. | |||
2355 | /// \param __b | |||
2356 | /// A 128-bit unsigned [16 x i8] vector. | |||
2357 | /// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of | |||
2358 | /// each comparison. | |||
2359 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, | |||
2360 | __m128i __b) { | |||
2361 | return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b); | |||
2362 | } | |||
2363 | ||||
2364 | /// Multiplies the corresponding elements of two signed [8 x i16] | |||
2365 | /// vectors, saving the upper 16 bits of each 32-bit product in the | |||
2366 | /// corresponding element of a 128-bit signed [8 x i16] result vector. | |||
2367 | /// | |||
2368 | /// \headerfile <x86intrin.h> | |||
2369 | /// | |||
2370 | /// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction. | |||
2371 | /// | |||
2372 | /// \param __a | |||
2373 | /// A 128-bit signed [8 x i16] vector. | |||
2374 | /// \param __b | |||
2375 | /// A 128-bit signed [8 x i16] vector. | |||
2376 | /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of | |||
2377 | /// each of the eight 32-bit products. | |||
2378 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, | |||
2379 | __m128i __b) { | |||
2380 | return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); | |||
2381 | } | |||
2382 | ||||
2383 | /// Multiplies the corresponding elements of two unsigned [8 x i16] | |||
2384 | /// vectors, saving the upper 16 bits of each 32-bit product in the | |||
2385 | /// corresponding element of a 128-bit unsigned [8 x i16] result vector. | |||
2386 | /// | |||
2387 | /// \headerfile <x86intrin.h> | |||
2388 | /// | |||
2389 | /// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction. | |||
2390 | /// | |||
2391 | /// \param __a | |||
2392 | /// A 128-bit unsigned [8 x i16] vector. | |||
2393 | /// \param __b | |||
2394 | /// A 128-bit unsigned [8 x i16] vector. | |||
2395 | /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits | |||
2396 | /// of each of the eight 32-bit products. | |||
2397 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, | |||
2398 | __m128i __b) { | |||
2399 | return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); | |||
2400 | } | |||
2401 | ||||
2402 | /// Multiplies the corresponding elements of two signed [8 x i16] | |||
2403 | /// vectors, saving the lower 16 bits of each 32-bit product in the | |||
2404 | /// corresponding element of a 128-bit signed [8 x i16] result vector. | |||
2405 | /// | |||
2406 | /// \headerfile <x86intrin.h> | |||
2407 | /// | |||
2408 | /// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction. | |||
2409 | /// | |||
2410 | /// \param __a | |||
2411 | /// A 128-bit signed [8 x i16] vector. | |||
2412 | /// \param __b | |||
2413 | /// A 128-bit signed [8 x i16] vector. | |||
2414 | /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of | |||
2415 | /// each of the eight 32-bit products. | |||
2416 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, | |||
2417 | __m128i __b) { | |||
2418 | return (__m128i)((__v8hu)__a * (__v8hu)__b); | |||
2419 | } | |||
2420 | ||||
2421 | /// Multiplies 32-bit unsigned integer values contained in the lower bits | |||
2422 | /// of the two 64-bit integer vectors and returns the 64-bit unsigned | |||
2423 | /// product. | |||
2424 | /// | |||
2425 | /// \headerfile <x86intrin.h> | |||
2426 | /// | |||
2427 | /// This intrinsic corresponds to the <c> PMULUDQ </c> instruction. | |||
2428 | /// | |||
2429 | /// \param __a | |||
2430 | /// A 64-bit integer containing one of the source operands. | |||
2431 | /// \param __b | |||
2432 | /// A 64-bit integer containing one of the source operands. | |||
2433 | /// \returns A 64-bit integer vector containing the product of both operands. | |||
2434 | static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mul_su32(__m64 __a, | |||
2435 | __m64 __b) { | |||
2436 | return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); | |||
2437 | } | |||
2438 | ||||
2439 | /// Multiplies 32-bit unsigned integer values contained in the lower | |||
2440 | /// bits of the corresponding elements of two [2 x i64] vectors, and returns | |||
2441 | /// the 64-bit products in the corresponding elements of a [2 x i64] vector. | |||
2442 | /// | |||
2443 | /// \headerfile <x86intrin.h> | |||
2444 | /// | |||
2445 | /// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction. | |||
2446 | /// | |||
2447 | /// \param __a | |||
2448 | /// A [2 x i64] vector containing one of the source operands. | |||
2449 | /// \param __b | |||
2450 | /// A [2 x i64] vector containing one of the source operands. | |||
2451 | /// \returns A [2 x i64] vector containing the product of both operands. | |||
2452 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, | |||
2453 | __m128i __b) { | |||
2454 | return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); | |||
2455 | } | |||
2456 | ||||
2457 | /// Computes the absolute differences of corresponding 8-bit integer | |||
2458 | /// values in two 128-bit vectors. Sums the first 8 absolute differences, and | |||
2459 | /// separately sums the second 8 absolute differences. Packs these two | |||
2460 | /// unsigned 16-bit integer sums into the upper and lower elements of a | |||
2461 | /// [2 x i64] vector. | |||
2462 | /// | |||
2463 | /// \headerfile <x86intrin.h> | |||
2464 | /// | |||
2465 | /// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction. | |||
2466 | /// | |||
2467 | /// \param __a | |||
2468 | /// A 128-bit integer vector containing one of the source operands. | |||
2469 | /// \param __b | |||
2470 | /// A 128-bit integer vector containing one of the source operands. | |||
2471 | /// \returns A [2 x i64] vector containing the sums of the sets of absolute | |||
2472 | /// differences between both operands. | |||
2473 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, | |||
2474 | __m128i __b) { | |||
2475 | return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); | |||
2476 | } | |||
2477 | ||||
2478 | /// Subtracts the corresponding 8-bit integer values in the operands. | |||
2479 | /// | |||
2480 | /// \headerfile <x86intrin.h> | |||
2481 | /// | |||
2482 | /// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction. | |||
2483 | /// | |||
2484 | /// \param __a | |||
2485 | /// A 128-bit integer vector containing the minuends. | |||
2486 | /// \param __b | |||
2487 | /// A 128-bit integer vector containing the subtrahends. | |||
2488 | /// \returns A 128-bit integer vector containing the differences of the values | |||
2489 | /// in the operands. | |||
2490 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, | |||
2491 | __m128i __b) { | |||
2492 | return (__m128i)((__v16qu)__a - (__v16qu)__b); | |||
2493 | } | |||
2494 | ||||
2495 | /// Subtracts the corresponding 16-bit integer values in the operands. | |||
2496 | /// | |||
2497 | /// \headerfile <x86intrin.h> | |||
2498 | /// | |||
2499 | /// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction. | |||
2500 | /// | |||
2501 | /// \param __a | |||
2502 | /// A 128-bit integer vector containing the minuends. | |||
2503 | /// \param __b | |||
2504 | /// A 128-bit integer vector containing the subtrahends. | |||
2505 | /// \returns A 128-bit integer vector containing the differences of the values | |||
2506 | /// in the operands. | |||
2507 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, | |||
2508 | __m128i __b) { | |||
2509 | return (__m128i)((__v8hu)__a - (__v8hu)__b); | |||
2510 | } | |||
2511 | ||||
2512 | /// Subtracts the corresponding 32-bit integer values in the operands. | |||
2513 | /// | |||
2514 | /// \headerfile <x86intrin.h> | |||
2515 | /// | |||
2516 | /// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction. | |||
2517 | /// | |||
2518 | /// \param __a | |||
2519 | /// A 128-bit integer vector containing the minuends. | |||
2520 | /// \param __b | |||
2521 | /// A 128-bit integer vector containing the subtrahends. | |||
2522 | /// \returns A 128-bit integer vector containing the differences of the values | |||
2523 | /// in the operands. | |||
2524 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, | |||
2525 | __m128i __b) { | |||
2526 | return (__m128i)((__v4su)__a - (__v4su)__b); | |||
2527 | } | |||
2528 | ||||
2529 | /// Subtracts signed or unsigned 64-bit integer values and writes the | |||
2530 | /// difference to the corresponding bits in the destination. | |||
2531 | /// | |||
2532 | /// \headerfile <x86intrin.h> | |||
2533 | /// | |||
2534 | /// This intrinsic corresponds to the <c> PSUBQ </c> instruction. | |||
2535 | /// | |||
2536 | /// \param __a | |||
2537 | /// A 64-bit integer vector containing the minuend. | |||
2538 | /// \param __b | |||
2539 | /// A 64-bit integer vector containing the subtrahend. | |||
2540 | /// \returns A 64-bit integer vector containing the difference of the values in | |||
2541 | /// the operands. | |||
2542 | static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, | |||
2543 | __m64 __b) { | |||
2544 | return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b); | |||
2545 | } | |||
2546 | ||||
2547 | /// Subtracts the corresponding elements of two [2 x i64] vectors. | |||
2548 | /// | |||
2549 | /// \headerfile <x86intrin.h> | |||
2550 | /// | |||
2551 | /// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction. | |||
2552 | /// | |||
2553 | /// \param __a | |||
2554 | /// A 128-bit integer vector containing the minuends. | |||
2555 | /// \param __b | |||
2556 | /// A 128-bit integer vector containing the subtrahends. | |||
2557 | /// \returns A 128-bit integer vector containing the differences of the values | |||
2558 | /// in the operands. | |||
2559 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, | |||
2560 | __m128i __b) { | |||
2561 | return (__m128i)((__v2du)__a - (__v2du)__b); | |||
2562 | } | |||
2563 | ||||
2564 | /// Subtracts, with saturation, corresponding 8-bit signed integer values in | |||
2565 | /// the input and returns the differences in the corresponding bytes in the | |||
2566 | /// destination. | |||
2567 | /// | |||
2568 | /// Differences greater than 0x7F are saturated to 0x7F, and differences | |||
2569 | /// less than 0x80 are saturated to 0x80. | |||
2570 | /// | |||
2571 | /// \headerfile <x86intrin.h> | |||
2572 | /// | |||
2573 | /// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction. | |||
2574 | /// | |||
2575 | /// \param __a | |||
2576 | /// A 128-bit integer vector containing the minuends. | |||
2577 | /// \param __b | |||
2578 | /// A 128-bit integer vector containing the subtrahends. | |||
2579 | /// \returns A 128-bit integer vector containing the differences of the values | |||
2580 | /// in the operands. | |||
2581 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, | |||
2582 | __m128i __b) { | |||
2583 | return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b); | |||
2584 | } | |||
2585 | ||||
2586 | /// Subtracts, with saturation, corresponding 16-bit signed integer values in | |||
2587 | /// the input and returns the differences in the corresponding bytes in the | |||
2588 | /// destination. | |||
2589 | /// | |||
2590 | /// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less | |||
2591 | /// than 0x8000 are saturated to 0x8000. | |||
2592 | /// | |||
2593 | /// \headerfile <x86intrin.h> | |||
2594 | /// | |||
2595 | /// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction. | |||
2596 | /// | |||
2597 | /// \param __a | |||
2598 | /// A 128-bit integer vector containing the minuends. | |||
2599 | /// \param __b | |||
2600 | /// A 128-bit integer vector containing the subtrahends. | |||
2601 | /// \returns A 128-bit integer vector containing the differences of the values | |||
2602 | /// in the operands. | |||
2603 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, | |||
2604 | __m128i __b) { | |||
2605 | return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b); | |||
2606 | } | |||
2607 | ||||
2608 | /// Subtracts, with saturation, corresponding 8-bit unsigned integer values in | |||
2609 | /// the input and returns the differences in the corresponding bytes in the | |||
2610 | /// destination. | |||
2611 | /// | |||
2612 | /// Differences less than 0x00 are saturated to 0x00. | |||
2613 | /// | |||
2614 | /// \headerfile <x86intrin.h> | |||
2615 | /// | |||
2616 | /// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction. | |||
2617 | /// | |||
2618 | /// \param __a | |||
2619 | /// A 128-bit integer vector containing the minuends. | |||
2620 | /// \param __b | |||
2621 | /// A 128-bit integer vector containing the subtrahends. | |||
2622 | /// \returns A 128-bit integer vector containing the unsigned integer | |||
2623 | /// differences of the values in the operands. | |||
2624 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, | |||
2625 | __m128i __b) { | |||
2626 | return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b); | |||
2627 | } | |||
2628 | ||||
2629 | /// Subtracts, with saturation, corresponding 16-bit unsigned integer values in | |||
2630 | /// the input and returns the differences in the corresponding bytes in the | |||
2631 | /// destination. | |||
2632 | /// | |||
2633 | /// Differences less than 0x0000 are saturated to 0x0000. | |||
2634 | /// | |||
2635 | /// \headerfile <x86intrin.h> | |||
2636 | /// | |||
2637 | /// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction. | |||
2638 | /// | |||
2639 | /// \param __a | |||
2640 | /// A 128-bit integer vector containing the minuends. | |||
2641 | /// \param __b | |||
2642 | /// A 128-bit integer vector containing the subtrahends. | |||
2643 | /// \returns A 128-bit integer vector containing the unsigned integer | |||
2644 | /// differences of the values in the operands. | |||
2645 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, | |||
2646 | __m128i __b) { | |||
2647 | return (__m128i)__builtin_elementwise_sub_sat((__v8hu)__a, (__v8hu)__b); | |||
2648 | } | |||
2649 | ||||
2650 | /// Performs a bitwise AND of two 128-bit integer vectors. | |||
2651 | /// | |||
2652 | /// \headerfile <x86intrin.h> | |||
2653 | /// | |||
2654 | /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. | |||
2655 | /// | |||
2656 | /// \param __a | |||
2657 | /// A 128-bit integer vector containing one of the source operands. | |||
2658 | /// \param __b | |||
2659 | /// A 128-bit integer vector containing one of the source operands. | |||
2660 | /// \returns A 128-bit integer vector containing the bitwise AND of the values | |||
2661 | /// in both operands. | |||
2662 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, | |||
2663 | __m128i __b) { | |||
2664 | return (__m128i)((__v2du)__a & (__v2du)__b); | |||
2665 | } | |||
2666 | ||||
2667 | /// Performs a bitwise AND of two 128-bit integer vectors, using the | |||
2668 | /// one's complement of the values contained in the first source operand. | |||
2669 | /// | |||
2670 | /// \headerfile <x86intrin.h> | |||
2671 | /// | |||
2672 | /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. | |||
2673 | /// | |||
2674 | /// \param __a | |||
2675 | /// A 128-bit vector containing the left source operand. The one's complement | |||
2676 | /// of this value is used in the bitwise AND. | |||
2677 | /// \param __b | |||
2678 | /// A 128-bit vector containing the right source operand. | |||
2679 | /// \returns A 128-bit integer vector containing the bitwise AND of the one's | |||
2680 | /// complement of the first operand and the values in the second operand. | |||
2681 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, | |||
2682 | __m128i __b) { | |||
2683 | return (__m128i)(~(__v2du)__a & (__v2du)__b); | |||
2684 | } | |||
2685 | /// Performs a bitwise OR of two 128-bit integer vectors. | |||
2686 | /// | |||
2687 | /// \headerfile <x86intrin.h> | |||
2688 | /// | |||
2689 | /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. | |||
2690 | /// | |||
2691 | /// \param __a | |||
2692 | /// A 128-bit integer vector containing one of the source operands. | |||
2693 | /// \param __b | |||
2694 | /// A 128-bit integer vector containing one of the source operands. | |||
2695 | /// \returns A 128-bit integer vector containing the bitwise OR of the values | |||
2696 | /// in both operands. | |||
2697 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, | |||
2698 | __m128i __b) { | |||
2699 | return (__m128i)((__v2du)__a | (__v2du)__b); | |||
2700 | } | |||
2701 | ||||
2702 | /// Performs a bitwise exclusive OR of two 128-bit integer vectors. | |||
2703 | /// | |||
2704 | /// \headerfile <x86intrin.h> | |||
2705 | /// | |||
2706 | /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. | |||
2707 | /// | |||
2708 | /// \param __a | |||
2709 | /// A 128-bit integer vector containing one of the source operands. | |||
2710 | /// \param __b | |||
2711 | /// A 128-bit integer vector containing one of the source operands. | |||
2712 | /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the | |||
2713 | /// values in both operands. | |||
2714 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, | |||
2715 | __m128i __b) { | |||
2716 | return (__m128i)((__v2du)__a ^ (__v2du)__b); | |||
2717 | } | |||
2718 | ||||
2719 | /// Left-shifts the 128-bit integer vector operand by the specified | |||
2720 | /// number of bytes. Low-order bits are cleared. | |||
2721 | /// | |||
2722 | /// \headerfile <x86intrin.h> | |||
2723 | /// | |||
2724 | /// \code | |||
2725 | /// __m128i _mm_slli_si128(__m128i a, const int imm); | |||
2726 | /// \endcode | |||
2727 | /// | |||
2728 | /// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction. | |||
2729 | /// | |||
2730 | /// \param a | |||
2731 | /// A 128-bit integer vector containing the source operand. | |||
2732 | /// \param imm | |||
2733 | /// An immediate value specifying the number of bytes to left-shift operand | |||
2734 | /// \a a. | |||
2735 | /// \returns A 128-bit integer vector containing the left-shifted value. | |||
2736 | #define _mm_slli_si128(a, imm)((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i )(a), (int)(imm))) \ | |||
2737 | ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ | |||
2738 | (int)(imm))) | |||
2739 | ||||
2740 | #define _mm_bslli_si128(a, imm)((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i )(a), (int)(imm))) \ | |||
2741 | ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ | |||
2742 | (int)(imm))) | |||
2743 | ||||
2744 | /// Left-shifts each 16-bit value in the 128-bit integer vector operand | |||
2745 | /// by the specified number of bits. Low-order bits are cleared. | |||
2746 | /// | |||
2747 | /// \headerfile <x86intrin.h> | |||
2748 | /// | |||
2749 | /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. | |||
2750 | /// | |||
2751 | /// \param __a | |||
2752 | /// A 128-bit integer vector containing the source operand. | |||
2753 | /// \param __count | |||
2754 | /// An integer value specifying the number of bits to left-shift each value | |||
2755 | /// in operand \a __a. | |||
2756 | /// \returns A 128-bit integer vector containing the left-shifted values. | |||
2757 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, | |||
2758 | int __count) { | |||
2759 | return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); | |||
2760 | } | |||
2761 | ||||
2762 | /// Left-shifts each 16-bit value in the 128-bit integer vector operand | |||
2763 | /// by the specified number of bits. Low-order bits are cleared. | |||
2764 | /// | |||
2765 | /// \headerfile <x86intrin.h> | |||
2766 | /// | |||
2767 | /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. | |||
2768 | /// | |||
2769 | /// \param __a | |||
2770 | /// A 128-bit integer vector containing the source operand. | |||
2771 | /// \param __count | |||
2772 | /// A 128-bit integer vector in which bits [63:0] specify the number of bits | |||
2773 | /// to left-shift each value in operand \a __a. | |||
2774 | /// \returns A 128-bit integer vector containing the left-shifted values. | |||
2775 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, | |||
2776 | __m128i __count) { | |||
2777 | return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); | |||
2778 | } | |||
2779 | ||||
2780 | /// Left-shifts each 32-bit value in the 128-bit integer vector operand | |||
2781 | /// by the specified number of bits. Low-order bits are cleared. | |||
2782 | /// | |||
2783 | /// \headerfile <x86intrin.h> | |||
2784 | /// | |||
2785 | /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. | |||
2786 | /// | |||
2787 | /// \param __a | |||
2788 | /// A 128-bit integer vector containing the source operand. | |||
2789 | /// \param __count | |||
2790 | /// An integer value specifying the number of bits to left-shift each value | |||
2791 | /// in operand \a __a. | |||
2792 | /// \returns A 128-bit integer vector containing the left-shifted values. | |||
2793 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, | |||
2794 | int __count) { | |||
2795 | return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); | |||
2796 | } | |||
2797 | ||||
2798 | /// Left-shifts each 32-bit value in the 128-bit integer vector operand | |||
2799 | /// by the specified number of bits. Low-order bits are cleared. | |||
2800 | /// | |||
2801 | /// \headerfile <x86intrin.h> | |||
2802 | /// | |||
2803 | /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. | |||
2804 | /// | |||
2805 | /// \param __a | |||
2806 | /// A 128-bit integer vector containing the source operand. | |||
2807 | /// \param __count | |||
2808 | /// A 128-bit integer vector in which bits [63:0] specify the number of bits | |||
2809 | /// to left-shift each value in operand \a __a. | |||
2810 | /// \returns A 128-bit integer vector containing the left-shifted values. | |||
2811 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, | |||
2812 | __m128i __count) { | |||
2813 | return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); | |||
2814 | } | |||
2815 | ||||
2816 | /// Left-shifts each 64-bit value in the 128-bit integer vector operand | |||
2817 | /// by the specified number of bits. Low-order bits are cleared. | |||
2818 | /// | |||
2819 | /// \headerfile <x86intrin.h> | |||
2820 | /// | |||
2821 | /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. | |||
2822 | /// | |||
2823 | /// \param __a | |||
2824 | /// A 128-bit integer vector containing the source operand. | |||
2825 | /// \param __count | |||
2826 | /// An integer value specifying the number of bits to left-shift each value | |||
2827 | /// in operand \a __a. | |||
2828 | /// \returns A 128-bit integer vector containing the left-shifted values. | |||
2829 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, | |||
2830 | int __count) { | |||
2831 | return __builtin_ia32_psllqi128((__v2di)__a, __count); | |||
2832 | } | |||
2833 | ||||
2834 | /// Left-shifts each 64-bit value in the 128-bit integer vector operand | |||
2835 | /// by the specified number of bits. Low-order bits are cleared. | |||
2836 | /// | |||
2837 | /// \headerfile <x86intrin.h> | |||
2838 | /// | |||
2839 | /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. | |||
2840 | /// | |||
2841 | /// \param __a | |||
2842 | /// A 128-bit integer vector containing the source operand. | |||
2843 | /// \param __count | |||
2844 | /// A 128-bit integer vector in which bits [63:0] specify the number of bits | |||
2845 | /// to left-shift each value in operand \a __a. | |||
2846 | /// \returns A 128-bit integer vector containing the left-shifted values. | |||
2847 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, | |||
2848 | __m128i __count) { | |||
2849 | return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count); | |||
2850 | } | |||
2851 | ||||
2852 | /// Right-shifts each 16-bit value in the 128-bit integer vector operand | |||
2853 | /// by the specified number of bits. High-order bits are filled with the sign | |||
2854 | /// bit of the initial value. | |||
2855 | /// | |||
2856 | /// \headerfile <x86intrin.h> | |||
2857 | /// | |||
2858 | /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. | |||
2859 | /// | |||
2860 | /// \param __a | |||
2861 | /// A 128-bit integer vector containing the source operand. | |||
2862 | /// \param __count | |||
2863 | /// An integer value specifying the number of bits to right-shift each value | |||
2864 | /// in operand \a __a. | |||
2865 | /// \returns A 128-bit integer vector containing the right-shifted values. | |||
2866 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, | |||
2867 | int __count) { | |||
2868 | return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); | |||
2869 | } | |||
2870 | ||||
2871 | /// Right-shifts each 16-bit value in the 128-bit integer vector operand | |||
2872 | /// by the specified number of bits. High-order bits are filled with the sign | |||
2873 | /// bit of the initial value. | |||
2874 | /// | |||
2875 | /// \headerfile <x86intrin.h> | |||
2876 | /// | |||
2877 | /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. | |||
2878 | /// | |||
2879 | /// \param __a | |||
2880 | /// A 128-bit integer vector containing the source operand. | |||
2881 | /// \param __count | |||
2882 | /// A 128-bit integer vector in which bits [63:0] specify the number of bits | |||
2883 | /// to right-shift each value in operand \a __a. | |||
2884 | /// \returns A 128-bit integer vector containing the right-shifted values. | |||
2885 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, | |||
2886 | __m128i __count) { | |||
2887 | return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); | |||
2888 | } | |||
2889 | ||||
2890 | /// Right-shifts each 32-bit value in the 128-bit integer vector operand | |||
2891 | /// by the specified number of bits. High-order bits are filled with the sign | |||
2892 | /// bit of the initial value. | |||
2893 | /// | |||
2894 | /// \headerfile <x86intrin.h> | |||
2895 | /// | |||
2896 | /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. | |||
2897 | /// | |||
2898 | /// \param __a | |||
2899 | /// A 128-bit integer vector containing the source operand. | |||
2900 | /// \param __count | |||
2901 | /// An integer value specifying the number of bits to right-shift each value | |||
2902 | /// in operand \a __a. | |||
2903 | /// \returns A 128-bit integer vector containing the right-shifted values. | |||
2904 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, | |||
2905 | int __count) { | |||
2906 | return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); | |||
2907 | } | |||
2908 | ||||
2909 | /// Right-shifts each 32-bit value in the 128-bit integer vector operand | |||
2910 | /// by the specified number of bits. High-order bits are filled with the sign | |||
2911 | /// bit of the initial value. | |||
2912 | /// | |||
2913 | /// \headerfile <x86intrin.h> | |||
2914 | /// | |||
2915 | /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. | |||
2916 | /// | |||
2917 | /// \param __a | |||
2918 | /// A 128-bit integer vector containing the source operand. | |||
2919 | /// \param __count | |||
2920 | /// A 128-bit integer vector in which bits [63:0] specify the number of bits | |||
2921 | /// to right-shift each value in operand \a __a. | |||
2922 | /// \returns A 128-bit integer vector containing the right-shifted values. | |||
2923 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, | |||
2924 | __m128i __count) { | |||
2925 | return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); | |||
2926 | } | |||
2927 | ||||
2928 | /// Right-shifts the 128-bit integer vector operand by the specified | |||
2929 | /// number of bytes. High-order bits are cleared. | |||
2930 | /// | |||
2931 | /// \headerfile <x86intrin.h> | |||
2932 | /// | |||
2933 | /// \code | |||
2934 | /// __m128i _mm_srli_si128(__m128i a, const int imm); | |||
2935 | /// \endcode | |||
2936 | /// | |||
2937 | /// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction. | |||
2938 | /// | |||
2939 | /// \param a | |||
2940 | /// A 128-bit integer vector containing the source operand. | |||
2941 | /// \param imm | |||
2942 | /// An immediate value specifying the number of bytes to right-shift operand | |||
2943 | /// \a a. | |||
2944 | /// \returns A 128-bit integer vector containing the right-shifted value. | |||
2945 | #define _mm_srli_si128(a, imm)((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i )(a), (int)(imm))) \ | |||
2946 | ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ | |||
2947 | (int)(imm))) | |||
2948 | ||||
2949 | #define _mm_bsrli_si128(a, imm)((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i )(a), (int)(imm))) \ | |||
2950 | ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ | |||
2951 | (int)(imm))) | |||
2952 | ||||
2953 | /// Right-shifts each of 16-bit values in the 128-bit integer vector | |||
2954 | /// operand by the specified number of bits. High-order bits are cleared. | |||
2955 | /// | |||
2956 | /// \headerfile <x86intrin.h> | |||
2957 | /// | |||
2958 | /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. | |||
2959 | /// | |||
2960 | /// \param __a | |||
2961 | /// A 128-bit integer vector containing the source operand. | |||
2962 | /// \param __count | |||
2963 | /// An integer value specifying the number of bits to right-shift each value | |||
2964 | /// in operand \a __a. | |||
2965 | /// \returns A 128-bit integer vector containing the right-shifted values. | |||
2966 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, | |||
2967 | int __count) { | |||
2968 | return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); | |||
2969 | } | |||
2970 | ||||
2971 | /// Right-shifts each of 16-bit values in the 128-bit integer vector | |||
2972 | /// operand by the specified number of bits. High-order bits are cleared. | |||
2973 | /// | |||
2974 | /// \headerfile <x86intrin.h> | |||
2975 | /// | |||
2976 | /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. | |||
2977 | /// | |||
2978 | /// \param __a | |||
2979 | /// A 128-bit integer vector containing the source operand. | |||
2980 | /// \param __count | |||
2981 | /// A 128-bit integer vector in which bits [63:0] specify the number of bits | |||
2982 | /// to right-shift each value in operand \a __a. | |||
2983 | /// \returns A 128-bit integer vector containing the right-shifted values. | |||
2984 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, | |||
2985 | __m128i __count) { | |||
2986 | return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); | |||
2987 | } | |||
2988 | ||||
2989 | /// Right-shifts each of 32-bit values in the 128-bit integer vector | |||
2990 | /// operand by the specified number of bits. High-order bits are cleared. | |||
2991 | /// | |||
2992 | /// \headerfile <x86intrin.h> | |||
2993 | /// | |||
2994 | /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. | |||
2995 | /// | |||
2996 | /// \param __a | |||
2997 | /// A 128-bit integer vector containing the source operand. | |||
2998 | /// \param __count | |||
2999 | /// An integer value specifying the number of bits to right-shift each value | |||
3000 | /// in operand \a __a. | |||
3001 | /// \returns A 128-bit integer vector containing the right-shifted values. | |||
3002 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, | |||
3003 | int __count) { | |||
3004 | return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); | |||
3005 | } | |||
3006 | ||||
3007 | /// Right-shifts each of 32-bit values in the 128-bit integer vector | |||
3008 | /// operand by the specified number of bits. High-order bits are cleared. | |||
3009 | /// | |||
3010 | /// \headerfile <x86intrin.h> | |||
3011 | /// | |||
3012 | /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. | |||
3013 | /// | |||
3014 | /// \param __a | |||
3015 | /// A 128-bit integer vector containing the source operand. | |||
3016 | /// \param __count | |||
3017 | /// A 128-bit integer vector in which bits [63:0] specify the number of bits | |||
3018 | /// to right-shift each value in operand \a __a. | |||
3019 | /// \returns A 128-bit integer vector containing the right-shifted values. | |||
3020 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, | |||
3021 | __m128i __count) { | |||
3022 | return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); | |||
3023 | } | |||
3024 | ||||
3025 | /// Right-shifts each of 64-bit values in the 128-bit integer vector | |||
3026 | /// operand by the specified number of bits. High-order bits are cleared. | |||
3027 | /// | |||
3028 | /// \headerfile <x86intrin.h> | |||
3029 | /// | |||
3030 | /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. | |||
3031 | /// | |||
3032 | /// \param __a | |||
3033 | /// A 128-bit integer vector containing the source operand. | |||
3034 | /// \param __count | |||
3035 | /// An integer value specifying the number of bits to right-shift each value | |||
3036 | /// in operand \a __a. | |||
3037 | /// \returns A 128-bit integer vector containing the right-shifted values. | |||
3038 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, | |||
3039 | int __count) { | |||
3040 | return __builtin_ia32_psrlqi128((__v2di)__a, __count); | |||
3041 | } | |||
3042 | ||||
3043 | /// Right-shifts each of 64-bit values in the 128-bit integer vector | |||
3044 | /// operand by the specified number of bits. High-order bits are cleared. | |||
3045 | /// | |||
3046 | /// \headerfile <x86intrin.h> | |||
3047 | /// | |||
3048 | /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. | |||
3049 | /// | |||
3050 | /// \param __a | |||
3051 | /// A 128-bit integer vector containing the source operand. | |||
3052 | /// \param __count | |||
3053 | /// A 128-bit integer vector in which bits [63:0] specify the number of bits | |||
3054 | /// to right-shift each value in operand \a __a. | |||
3055 | /// \returns A 128-bit integer vector containing the right-shifted values. | |||
3056 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, | |||
3057 | __m128i __count) { | |||
3058 | return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count); | |||
3059 | } | |||
3060 | ||||
3061 | /// Compares each of the corresponding 8-bit values of the 128-bit | |||
3062 | /// integer vectors for equality. | |||
3063 | /// | |||
3064 | /// Each comparison returns 0x0 for false, 0xFF for true. | |||
3065 | /// | |||
3066 | /// \headerfile <x86intrin.h> | |||
3067 | /// | |||
3068 | /// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction. | |||
3069 | /// | |||
3070 | /// \param __a | |||
3071 | /// A 128-bit integer vector. | |||
3072 | /// \param __b | |||
3073 | /// A 128-bit integer vector. | |||
3074 | /// \returns A 128-bit integer vector containing the comparison results. | |||
3075 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, | |||
3076 | __m128i __b) { | |||
3077 | return (__m128i)((__v16qi)__a == (__v16qi)__b); | |||
3078 | } | |||
3079 | ||||
3080 | /// Compares each of the corresponding 16-bit values of the 128-bit | |||
3081 | /// integer vectors for equality. | |||
3082 | /// | |||
3083 | /// Each comparison returns 0x0 for false, 0xFFFF for true. | |||
3084 | /// | |||
3085 | /// \headerfile <x86intrin.h> | |||
3086 | /// | |||
3087 | /// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction. | |||
3088 | /// | |||
3089 | /// \param __a | |||
3090 | /// A 128-bit integer vector. | |||
3091 | /// \param __b | |||
3092 | /// A 128-bit integer vector. | |||
3093 | /// \returns A 128-bit integer vector containing the comparison results. | |||
3094 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, | |||
3095 | __m128i __b) { | |||
3096 | return (__m128i)((__v8hi)__a == (__v8hi)__b); | |||
3097 | } | |||
3098 | ||||
3099 | /// Compares each of the corresponding 32-bit values of the 128-bit | |||
3100 | /// integer vectors for equality. | |||
3101 | /// | |||
3102 | /// Each comparison returns 0x0 for false, 0xFFFFFFFF for true. | |||
3103 | /// | |||
3104 | /// \headerfile <x86intrin.h> | |||
3105 | /// | |||
3106 | /// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction. | |||
3107 | /// | |||
3108 | /// \param __a | |||
3109 | /// A 128-bit integer vector. | |||
3110 | /// \param __b | |||
3111 | /// A 128-bit integer vector. | |||
3112 | /// \returns A 128-bit integer vector containing the comparison results. | |||
3113 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, | |||
3114 | __m128i __b) { | |||
3115 | return (__m128i)((__v4si)__a == (__v4si)__b); | |||
3116 | } | |||
3117 | ||||
3118 | /// Compares each of the corresponding signed 8-bit values of the 128-bit | |||
3119 | /// integer vectors to determine if the values in the first operand are | |||
3120 | /// greater than those in the second operand. | |||
3121 | /// | |||
3122 | /// Each comparison returns 0x0 for false, 0xFF for true. | |||
3123 | /// | |||
3124 | /// \headerfile <x86intrin.h> | |||
3125 | /// | |||
3126 | /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. | |||
3127 | /// | |||
3128 | /// \param __a | |||
3129 | /// A 128-bit integer vector. | |||
3130 | /// \param __b | |||
3131 | /// A 128-bit integer vector. | |||
3132 | /// \returns A 128-bit integer vector containing the comparison results. | |||
3133 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, | |||
3134 | __m128i __b) { | |||
3135 | /* This function always performs a signed comparison, but __v16qi is a char | |||
3136 | which may be signed or unsigned, so use __v16qs. */ | |||
3137 | return (__m128i)((__v16qs)__a > (__v16qs)__b); | |||
3138 | } | |||
3139 | ||||
3140 | /// Compares each of the corresponding signed 16-bit values of the | |||
3141 | /// 128-bit integer vectors to determine if the values in the first operand | |||
3142 | /// are greater than those in the second operand. | |||
3143 | /// | |||
3144 | /// Each comparison returns 0x0 for false, 0xFFFF for true. | |||
3145 | /// | |||
3146 | /// \headerfile <x86intrin.h> | |||
3147 | /// | |||
3148 | /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. | |||
3149 | /// | |||
3150 | /// \param __a | |||
3151 | /// A 128-bit integer vector. | |||
3152 | /// \param __b | |||
3153 | /// A 128-bit integer vector. | |||
3154 | /// \returns A 128-bit integer vector containing the comparison results. | |||
3155 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, | |||
3156 | __m128i __b) { | |||
3157 | return (__m128i)((__v8hi)__a > (__v8hi)__b); | |||
3158 | } | |||
3159 | ||||
3160 | /// Compares each of the corresponding signed 32-bit values of the | |||
3161 | /// 128-bit integer vectors to determine if the values in the first operand | |||
3162 | /// are greater than those in the second operand. | |||
3163 | /// | |||
3164 | /// Each comparison returns 0x0 for false, 0xFFFFFFFF for true. | |||
3165 | /// | |||
3166 | /// \headerfile <x86intrin.h> | |||
3167 | /// | |||
3168 | /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. | |||
3169 | /// | |||
3170 | /// \param __a | |||
3171 | /// A 128-bit integer vector. | |||
3172 | /// \param __b | |||
3173 | /// A 128-bit integer vector. | |||
3174 | /// \returns A 128-bit integer vector containing the comparison results. | |||
3175 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, | |||
3176 | __m128i __b) { | |||
3177 | return (__m128i)((__v4si)__a > (__v4si)__b); | |||
3178 | } | |||
3179 | ||||
3180 | /// Compares each of the corresponding signed 8-bit values of the 128-bit | |||
3181 | /// integer vectors to determine if the values in the first operand are less | |||
3182 | /// than those in the second operand. | |||
3183 | /// | |||
3184 | /// Each comparison returns 0x0 for false, 0xFF for true. | |||
3185 | /// | |||
3186 | /// \headerfile <x86intrin.h> | |||
3187 | /// | |||
3188 | /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. | |||
3189 | /// | |||
3190 | /// \param __a | |||
3191 | /// A 128-bit integer vector. | |||
3192 | /// \param __b | |||
3193 | /// A 128-bit integer vector. | |||
3194 | /// \returns A 128-bit integer vector containing the comparison results. | |||
3195 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, | |||
3196 | __m128i __b) { | |||
3197 | return _mm_cmpgt_epi8(__b, __a); | |||
3198 | } | |||
3199 | ||||
3200 | /// Compares each of the corresponding signed 16-bit values of the | |||
3201 | /// 128-bit integer vectors to determine if the values in the first operand | |||
3202 | /// are less than those in the second operand. | |||
3203 | /// | |||
3204 | /// Each comparison returns 0x0 for false, 0xFFFF for true. | |||
3205 | /// | |||
3206 | /// \headerfile <x86intrin.h> | |||
3207 | /// | |||
3208 | /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. | |||
3209 | /// | |||
3210 | /// \param __a | |||
3211 | /// A 128-bit integer vector. | |||
3212 | /// \param __b | |||
3213 | /// A 128-bit integer vector. | |||
3214 | /// \returns A 128-bit integer vector containing the comparison results. | |||
3215 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, | |||
3216 | __m128i __b) { | |||
3217 | return _mm_cmpgt_epi16(__b, __a); | |||
3218 | } | |||
3219 | ||||
3220 | /// Compares each of the corresponding signed 32-bit values of the | |||
3221 | /// 128-bit integer vectors to determine if the values in the first operand | |||
3222 | /// are less than those in the second operand. | |||
3223 | /// | |||
3224 | /// Each comparison returns 0x0 for false, 0xFFFFFFFF for true. | |||
3225 | /// | |||
3226 | /// \headerfile <x86intrin.h> | |||
3227 | /// | |||
3228 | /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. | |||
3229 | /// | |||
3230 | /// \param __a | |||
3231 | /// A 128-bit integer vector. | |||
3232 | /// \param __b | |||
3233 | /// A 128-bit integer vector. | |||
3234 | /// \returns A 128-bit integer vector containing the comparison results. | |||
3235 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, | |||
3236 | __m128i __b) { | |||
3237 | return _mm_cmpgt_epi32(__b, __a); | |||
3238 | } | |||
3239 | ||||
3240 | #ifdef __x86_64__1 | |||
3241 | /// Converts a 64-bit signed integer value from the second operand into a | |||
3242 | /// double-precision value and returns it in the lower element of a [2 x | |||
3243 | /// double] vector; the upper element of the returned vector is copied from | |||
3244 | /// the upper element of the first operand. | |||
3245 | /// | |||
3246 | /// \headerfile <x86intrin.h> | |||
3247 | /// | |||
3248 | /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. | |||
3249 | /// | |||
3250 | /// \param __a | |||
3251 | /// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are | |||
3252 | /// copied to the upper 64 bits of the destination. | |||
3253 | /// \param __b | |||
3254 | /// A 64-bit signed integer operand containing the value to be converted. | |||
3255 | /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the | |||
3256 | /// converted value of the second operand. The upper 64 bits are copied from | |||
3257 | /// the upper 64 bits of the first operand. | |||
3258 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi64_sd(__m128d __a, | |||
3259 | long long __b) { | |||
3260 | __a[0] = __b; | |||
3261 | return __a; | |||
3262 | } | |||
3263 | ||||
3264 | /// Converts the first (lower) element of a vector of [2 x double] into a | |||
3265 | /// 64-bit signed integer value. | |||
3266 | /// | |||
3267 | /// If the converted value does not fit in a 64-bit integer, raises a | |||
3268 | /// floating-point invalid exception. If the exception is masked, returns | |||
3269 | /// the most negative integer. | |||
3270 | /// | |||
3271 | /// \headerfile <x86intrin.h> | |||
3272 | /// | |||
3273 | /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. | |||
3274 | /// | |||
3275 | /// \param __a | |||
3276 | /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the | |||
3277 | /// conversion. | |||
3278 | /// \returns A 64-bit signed integer containing the converted value. | |||
3279 | static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsd_si64(__m128d __a) { | |||
3280 | return __builtin_ia32_cvtsd2si64((__v2df)__a); | |||
3281 | } | |||
3282 | ||||
3283 | /// Converts the first (lower) element of a vector of [2 x double] into a | |||
3284 | /// 64-bit signed truncated (rounded toward zero) integer value. | |||
3285 | /// | |||
3286 | /// If a converted value does not fit in a 64-bit integer, raises a | |||
3287 | /// floating-point invalid exception. If the exception is masked, returns | |||
3288 | /// the most negative integer. | |||
3289 | /// | |||
3290 | /// \headerfile <x86intrin.h> | |||
3291 | /// | |||
3292 | /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> | |||
3293 | /// instruction. | |||
3294 | /// | |||
3295 | /// \param __a | |||
3296 | /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the | |||
3297 | /// conversion. | |||
3298 | /// \returns A 64-bit signed integer containing the converted value. | |||
3299 | static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttsd_si64(__m128d __a) { | |||
3300 | return __builtin_ia32_cvttsd2si64((__v2df)__a); | |||
3301 | } | |||
3302 | #endif | |||
3303 | ||||
3304 | /// Converts a vector of [4 x i32] into a vector of [4 x float]. | |||
3305 | /// | |||
3306 | /// \headerfile <x86intrin.h> | |||
3307 | /// | |||
3308 | /// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction. | |||
3309 | /// | |||
3310 | /// \param __a | |||
3311 | /// A 128-bit integer vector. | |||
3312 | /// \returns A 128-bit vector of [4 x float] containing the converted values. | |||
3313 | static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a) { | |||
3314 | return (__m128) __builtin_convertvector((__v4si)__a, __v4sf); | |||
3315 | } | |||
3316 | ||||
3317 | /// Converts a vector of [4 x float] into a vector of [4 x i32]. | |||
3318 | /// | |||
3319 | /// If a converted value does not fit in a 32-bit integer, raises a | |||
3320 | /// floating-point invalid exception. If the exception is masked, returns | |||
3321 | /// the most negative integer. | |||
3322 | /// | |||
3323 | /// \headerfile <x86intrin.h> | |||
3324 | /// | |||
3325 | /// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction. | |||
3326 | /// | |||
3327 | /// \param __a | |||
3328 | /// A 128-bit vector of [4 x float]. | |||
3329 | /// \returns A 128-bit integer vector of [4 x i32] containing the converted | |||
3330 | /// values. | |||
3331 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a) { | |||
3332 | return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a); | |||
3333 | } | |||
3334 | ||||
3335 | /// Converts a vector of [4 x float] into four signed truncated (rounded toward | |||
3336 | /// zero) 32-bit integers, returned in a vector of [4 x i32]. | |||
3337 | /// | |||
3338 | /// If a converted value does not fit in a 32-bit integer, raises a | |||
3339 | /// floating-point invalid exception. If the exception is masked, returns | |||
3340 | /// the most negative integer. | |||
3341 | /// | |||
3342 | /// \headerfile <x86intrin.h> | |||
3343 | /// | |||
3344 | /// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c> | |||
3345 | /// instruction. | |||
3346 | /// | |||
3347 | /// \param __a | |||
3348 | /// A 128-bit vector of [4 x float]. | |||
3349 | /// \returns A 128-bit vector of [4 x i32] containing the converted values. | |||
3350 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { | |||
3351 | return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); | |||
3352 | } | |||
3353 | ||||
3354 | /// Returns a vector of [4 x i32] where the lowest element is the input | |||
3355 | /// operand and the remaining elements are zero. | |||
3356 | /// | |||
3357 | /// \headerfile <x86intrin.h> | |||
3358 | /// | |||
3359 | /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. | |||
3360 | /// | |||
3361 | /// \param __a | |||
3362 | /// A 32-bit signed integer operand. | |||
3363 | /// \returns A 128-bit vector of [4 x i32]. | |||
3364 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) { | |||
3365 | return __extension__(__m128i)(__v4si){__a, 0, 0, 0}; | |||
3366 | } | |||
3367 | ||||
3368 | /// Returns a vector of [2 x i64] where the lower element is the input | |||
3369 | /// operand and the upper element is zero. | |||
3370 | /// | |||
3371 | /// \headerfile <x86intrin.h> | |||
3372 | /// | |||
3373 | /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction | |||
3374 | /// in 64-bit mode. | |||
3375 | /// | |||
3376 | /// \param __a | |||
3377 | /// A 64-bit signed integer operand containing the value to be converted. | |||
3378 | /// \returns A 128-bit vector of [2 x i64] containing the converted value. | |||
3379 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) { | |||
3380 | return __extension__(__m128i)(__v2di){__a, 0}; | |||
3381 | } | |||
3382 | ||||
3383 | /// Moves the least significant 32 bits of a vector of [4 x i32] to a | |||
3384 | /// 32-bit signed integer value. | |||
3385 | /// | |||
3386 | /// \headerfile <x86intrin.h> | |||
3387 | /// | |||
3388 | /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. | |||
3389 | /// | |||
3390 | /// \param __a | |||
3391 | /// A vector of [4 x i32]. The least significant 32 bits are moved to the | |||
3392 | /// destination. | |||
3393 | /// \returns A 32-bit signed integer containing the moved value. | |||
3394 | static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) { | |||
3395 | __v4si __b = (__v4si)__a; | |||
3396 | return __b[0]; | |||
3397 | } | |||
3398 | ||||
3399 | /// Moves the least significant 64 bits of a vector of [2 x i64] to a | |||
3400 | /// 64-bit signed integer value. | |||
3401 | /// | |||
3402 | /// \headerfile <x86intrin.h> | |||
3403 | /// | |||
3404 | /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. | |||
3405 | /// | |||
3406 | /// \param __a | |||
3407 | /// A vector of [2 x i64]. The least significant 64 bits are moved to the | |||
3408 | /// destination. | |||
3409 | /// \returns A 64-bit signed integer containing the moved value. | |||
3410 | static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsi128_si64(__m128i __a) { | |||
3411 | return __a[0]; | |||
3412 | } | |||
3413 | ||||
3414 | /// Moves packed integer values from an aligned 128-bit memory location | |||
3415 | /// to elements in a 128-bit integer vector. | |||
3416 | /// | |||
3417 | /// \headerfile <x86intrin.h> | |||
3418 | /// | |||
3419 | /// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction. | |||
3420 | /// | |||
3421 | /// \param __p | |||
3422 | /// An aligned pointer to a memory location containing integer values. | |||
3423 | /// \returns A 128-bit integer vector containing the moved values. | |||
3424 | static __inline__ __m128i __DEFAULT_FN_ATTRS | |||
3425 | _mm_load_si128(__m128i const *__p) { | |||
3426 | return *__p; | |||
3427 | } | |||
3428 | ||||
3429 | /// Moves packed integer values from an unaligned 128-bit memory location | |||
3430 | /// to elements in a 128-bit integer vector. | |||
3431 | /// | |||
3432 | /// \headerfile <x86intrin.h> | |||
3433 | /// | |||
3434 | /// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction. | |||
3435 | /// | |||
3436 | /// \param __p | |||
3437 | /// A pointer to a memory location containing integer values. | |||
3438 | /// \returns A 128-bit integer vector containing the moved values. | |||
3439 | static __inline__ __m128i __DEFAULT_FN_ATTRS | |||
3440 | _mm_loadu_si128(__m128i_u const *__p) { | |||
3441 | struct __loadu_si128 { | |||
3442 | __m128i_u __v; | |||
3443 | } __attribute__((__packed__, __may_alias__)); | |||
3444 | return ((const struct __loadu_si128 *)__p)->__v; | |||
| ||||
3445 | } | |||
3446 | ||||
3447 | /// Returns a vector of [2 x i64] where the lower element is taken from | |||
3448 | /// the lower element of the operand, and the upper element is zero. | |||
3449 | /// | |||
3450 | /// \headerfile <x86intrin.h> | |||
3451 | /// | |||
3452 | /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. | |||
3453 | /// | |||
3454 | /// \param __p | |||
3455 | /// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of | |||
3456 | /// the destination. | |||
3457 | /// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the | |||
3458 | /// moved value. The higher order bits are cleared. | |||
3459 | static __inline__ __m128i __DEFAULT_FN_ATTRS | |||
3460 | _mm_loadl_epi64(__m128i_u const *__p) { | |||
3461 | struct __mm_loadl_epi64_struct { | |||
3462 | long long __u; | |||
3463 | } __attribute__((__packed__, __may_alias__)); | |||
3464 | return __extension__(__m128i){ | |||
3465 | ((const struct __mm_loadl_epi64_struct *)__p)->__u, 0}; | |||
3466 | } | |||
3467 | ||||
3468 | /// Generates a 128-bit vector of [4 x i32] with unspecified content. | |||
3469 | /// This could be used as an argument to another intrinsic function where the | |||
3470 | /// argument is required but the value is not actually used. | |||
3471 | /// | |||
3472 | /// \headerfile <x86intrin.h> | |||
3473 | /// | |||
3474 | /// This intrinsic has no corresponding instruction. | |||
3475 | /// | |||
3476 | /// \returns A 128-bit vector of [4 x i32] with unspecified content. | |||
3477 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) { | |||
3478 | return (__m128i)__builtin_ia32_undef128(); | |||
3479 | } | |||
3480 | ||||
3481 | /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with | |||
3482 | /// the specified 64-bit integer values. | |||
3483 | /// | |||
3484 | /// \headerfile <x86intrin.h> | |||
3485 | /// | |||
3486 | /// This intrinsic is a utility function and does not correspond to a specific | |||
3487 | /// instruction. | |||
3488 | /// | |||
3489 | /// \param __q1 | |||
3490 | /// A 64-bit integer value used to initialize the upper 64 bits of the | |||
3491 | /// destination vector of [2 x i64]. | |||
3492 | /// \param __q0 | |||
3493 | /// A 64-bit integer value used to initialize the lower 64 bits of the | |||
3494 | /// destination vector of [2 x i64]. | |||
3495 | /// \returns An initialized 128-bit vector of [2 x i64] containing the values | |||
3496 | /// provided in the operands. | |||
3497 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, | |||
3498 | long long __q0) { | |||
3499 | return __extension__(__m128i)(__v2di){__q0, __q1}; | |||
3500 | } | |||
3501 | ||||
3502 | /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with | |||
3503 | /// the specified 64-bit integer values. | |||
3504 | /// | |||
3505 | /// \headerfile <x86intrin.h> | |||
3506 | /// | |||
3507 | /// This intrinsic is a utility function and does not correspond to a specific | |||
3508 | /// instruction. | |||
3509 | /// | |||
3510 | /// \param __q1 | |||
3511 | /// A 64-bit integer value used to initialize the upper 64 bits of the | |||
3512 | /// destination vector of [2 x i64]. | |||
3513 | /// \param __q0 | |||
3514 | /// A 64-bit integer value used to initialize the lower 64 bits of the | |||
3515 | /// destination vector of [2 x i64]. | |||
3516 | /// \returns An initialized 128-bit vector of [2 x i64] containing the values | |||
3517 | /// provided in the operands. | |||
3518 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, | |||
3519 | __m64 __q0) { | |||
3520 | return _mm_set_epi64x((long long)__q1, (long long)__q0); | |||
3521 | } | |||
3522 | ||||
3523 | /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with | |||
3524 | /// the specified 32-bit integer values. | |||
3525 | /// | |||
3526 | /// \headerfile <x86intrin.h> | |||
3527 | /// | |||
3528 | /// This intrinsic is a utility function and does not correspond to a specific | |||
3529 | /// instruction. | |||
3530 | /// | |||
3531 | /// \param __i3 | |||
3532 | /// A 32-bit integer value used to initialize bits [127:96] of the | |||
3533 | /// destination vector. | |||
3534 | /// \param __i2 | |||
3535 | /// A 32-bit integer value used to initialize bits [95:64] of the destination | |||
3536 | /// vector. | |||
3537 | /// \param __i1 | |||
3538 | /// A 32-bit integer value used to initialize bits [63:32] of the destination | |||
3539 | /// vector. | |||
3540 | /// \param __i0 | |||
3541 | /// A 32-bit integer value used to initialize bits [31:0] of the destination | |||
3542 | /// vector. | |||
3543 | /// \returns An initialized 128-bit vector of [4 x i32] containing the values | |||
3544 | /// provided in the operands. | |||
3545 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, | |||
3546 | int __i1, int __i0) { | |||
3547 | return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3}; | |||
3548 | } | |||
3549 | ||||
3550 | /// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with | |||
3551 | /// the specified 16-bit integer values. | |||
3552 | /// | |||
3553 | /// \headerfile <x86intrin.h> | |||
3554 | /// | |||
3555 | /// This intrinsic is a utility function and does not correspond to a specific | |||
3556 | /// instruction. | |||
3557 | /// | |||
3558 | /// \param __w7 | |||
3559 | /// A 16-bit integer value used to initialize bits [127:112] of the | |||
3560 | /// destination vector. | |||
3561 | /// \param __w6 | |||
3562 | /// A 16-bit integer value used to initialize bits [111:96] of the | |||
3563 | /// destination vector. | |||
3564 | /// \param __w5 | |||
3565 | /// A 16-bit integer value used to initialize bits [95:80] of the destination | |||
3566 | /// vector. | |||
3567 | /// \param __w4 | |||
3568 | /// A 16-bit integer value used to initialize bits [79:64] of the destination | |||
3569 | /// vector. | |||
3570 | /// \param __w3 | |||
3571 | /// A 16-bit integer value used to initialize bits [63:48] of the destination | |||
3572 | /// vector. | |||
3573 | /// \param __w2 | |||
3574 | /// A 16-bit integer value used to initialize bits [47:32] of the destination | |||
3575 | /// vector. | |||
3576 | /// \param __w1 | |||
3577 | /// A 16-bit integer value used to initialize bits [31:16] of the destination | |||
3578 | /// vector. | |||
3579 | /// \param __w0 | |||
3580 | /// A 16-bit integer value used to initialize bits [15:0] of the destination | |||
3581 | /// vector. | |||
3582 | /// \returns An initialized 128-bit vector of [8 x i16] containing the values | |||
3583 | /// provided in the operands. | |||
3584 | static __inline__ __m128i __DEFAULT_FN_ATTRS | |||
3585 | _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, | |||
3586 | short __w2, short __w1, short __w0) { | |||
3587 | return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3, | |||
3588 | __w4, __w5, __w6, __w7}; | |||
3589 | } | |||
3590 | ||||
3591 | /// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with | |||
3592 | /// the specified 8-bit integer values. | |||
3593 | /// | |||
3594 | /// \headerfile <x86intrin.h> | |||
3595 | /// | |||
3596 | /// This intrinsic is a utility function and does not correspond to a specific | |||
3597 | /// instruction. | |||
3598 | /// | |||
3599 | /// \param __b15 | |||
3600 | /// Initializes bits [127:120] of the destination vector. | |||
3601 | /// \param __b14 | |||
3602 | /// Initializes bits [119:112] of the destination vector. | |||
3603 | /// \param __b13 | |||
3604 | /// Initializes bits [111:104] of the destination vector. | |||
3605 | /// \param __b12 | |||
3606 | /// Initializes bits [103:96] of the destination vector. | |||
3607 | /// \param __b11 | |||
3608 | /// Initializes bits [95:88] of the destination vector. | |||
3609 | /// \param __b10 | |||
3610 | /// Initializes bits [87:80] of the destination vector. | |||
3611 | /// \param __b9 | |||
3612 | /// Initializes bits [79:72] of the destination vector. | |||
3613 | /// \param __b8 | |||
3614 | /// Initializes bits [71:64] of the destination vector. | |||
3615 | /// \param __b7 | |||
3616 | /// Initializes bits [63:56] of the destination vector. | |||
3617 | /// \param __b6 | |||
3618 | /// Initializes bits [55:48] of the destination vector. | |||
3619 | /// \param __b5 | |||
3620 | /// Initializes bits [47:40] of the destination vector. | |||
3621 | /// \param __b4 | |||
3622 | /// Initializes bits [39:32] of the destination vector. | |||
3623 | /// \param __b3 | |||
3624 | /// Initializes bits [31:24] of the destination vector. | |||
3625 | /// \param __b2 | |||
3626 | /// Initializes bits [23:16] of the destination vector. | |||
3627 | /// \param __b1 | |||
3628 | /// Initializes bits [15:8] of the destination vector. | |||
3629 | /// \param __b0 | |||
3630 | /// Initializes bits [7:0] of the destination vector. | |||
3631 | /// \returns An initialized 128-bit vector of [16 x i8] containing the values | |||
3632 | /// provided in the operands. | |||
3633 | static __inline__ __m128i __DEFAULT_FN_ATTRS | |||
3634 | _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, | |||
3635 | char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, | |||
3636 | char __b4, char __b3, char __b2, char __b1, char __b0) { | |||
3637 | return __extension__(__m128i)(__v16qi){ | |||
3638 | __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, | |||
3639 | __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15}; | |||
3640 | } | |||
3641 | ||||
3642 | /// Initializes both values in a 128-bit integer vector with the | |||
3643 | /// specified 64-bit integer value. | |||
3644 | /// | |||
3645 | /// \headerfile <x86intrin.h> | |||
3646 | /// | |||
3647 | /// This intrinsic is a utility function and does not correspond to a specific | |||
3648 | /// instruction. | |||
3649 | /// | |||
3650 | /// \param __q | |||
3651 | /// Integer value used to initialize the elements of the destination integer | |||
3652 | /// vector. | |||
3653 | /// \returns An initialized 128-bit integer vector of [2 x i64] with both | |||
3654 | /// elements containing the value provided in the operand. | |||
3655 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) { | |||
3656 | return _mm_set_epi64x(__q, __q); | |||
3657 | } | |||
3658 | ||||
3659 | /// Initializes both values in a 128-bit vector of [2 x i64] with the | |||
3660 | /// specified 64-bit value. | |||
3661 | /// | |||
3662 | /// \headerfile <x86intrin.h> | |||
3663 | /// | |||
3664 | /// This intrinsic is a utility function and does not correspond to a specific | |||
3665 | /// instruction. | |||
3666 | /// | |||
3667 | /// \param __q | |||
3668 | /// A 64-bit value used to initialize the elements of the destination integer | |||
3669 | /// vector. | |||
3670 | /// \returns An initialized 128-bit vector of [2 x i64] with all elements | |||
3671 | /// containing the value provided in the operand. | |||
3672 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) { | |||
3673 | return _mm_set_epi64(__q, __q); | |||
3674 | } | |||
3675 | ||||
3676 | /// Initializes all values in a 128-bit vector of [4 x i32] with the | |||
3677 | /// specified 32-bit value. | |||
3678 | /// | |||
3679 | /// \headerfile <x86intrin.h> | |||
3680 | /// | |||
3681 | /// This intrinsic is a utility function and does not correspond to a specific | |||
3682 | /// instruction. | |||
3683 | /// | |||
3684 | /// \param __i | |||
3685 | /// A 32-bit value used to initialize the elements of the destination integer | |||
3686 | /// vector. | |||
3687 | /// \returns An initialized 128-bit vector of [4 x i32] with all elements | |||
3688 | /// containing the value provided in the operand. | |||
3689 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) { | |||
3690 | return _mm_set_epi32(__i, __i, __i, __i); | |||
3691 | } | |||
3692 | ||||
3693 | /// Initializes all values in a 128-bit vector of [8 x i16] with the | |||
3694 | /// specified 16-bit value. | |||
3695 | /// | |||
3696 | /// \headerfile <x86intrin.h> | |||
3697 | /// | |||
3698 | /// This intrinsic is a utility function and does not correspond to a specific | |||
3699 | /// instruction. | |||
3700 | /// | |||
3701 | /// \param __w | |||
3702 | /// A 16-bit value used to initialize the elements of the destination integer | |||
3703 | /// vector. | |||
3704 | /// \returns An initialized 128-bit vector of [8 x i16] with all elements | |||
3705 | /// containing the value provided in the operand. | |||
3706 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) { | |||
3707 | return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w); | |||
3708 | } | |||
3709 | ||||
3710 | /// Initializes all values in a 128-bit vector of [16 x i8] with the | |||
3711 | /// specified 8-bit value. | |||
3712 | /// | |||
3713 | /// \headerfile <x86intrin.h> | |||
3714 | /// | |||
3715 | /// This intrinsic is a utility function and does not correspond to a specific | |||
3716 | /// instruction. | |||
3717 | /// | |||
3718 | /// \param __b | |||
3719 | /// An 8-bit value used to initialize the elements of the destination integer | |||
3720 | /// vector. | |||
3721 | /// \returns An initialized 128-bit vector of [16 x i8] with all elements | |||
3722 | /// containing the value provided in the operand. | |||
3723 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) { | |||
3724 | return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, | |||
3725 | __b, __b, __b, __b, __b); | |||
3726 | } | |||
3727 | ||||
3728 | /// Constructs a 128-bit integer vector, initialized in reverse order | |||
3729 | /// with the specified 64-bit integral values. | |||
3730 | /// | |||
3731 | /// \headerfile <x86intrin.h> | |||
3732 | /// | |||
3733 | /// This intrinsic does not correspond to a specific instruction. | |||
3734 | /// | |||
3735 | /// \param __q0 | |||
3736 | /// A 64-bit integral value used to initialize the lower 64 bits of the | |||
3737 | /// result. | |||
3738 | /// \param __q1 | |||
3739 | /// A 64-bit integral value used to initialize the upper 64 bits of the | |||
3740 | /// result. | |||
3741 | /// \returns An initialized 128-bit integer vector. | |||
3742 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, | |||
3743 | __m64 __q1) { | |||
3744 | return _mm_set_epi64(__q1, __q0); | |||
3745 | } | |||
3746 | ||||
3747 | /// Constructs a 128-bit integer vector, initialized in reverse order | |||
3748 | /// with the specified 32-bit integral values. | |||
3749 | /// | |||
3750 | /// \headerfile <x86intrin.h> | |||
3751 | /// | |||
3752 | /// This intrinsic is a utility function and does not correspond to a specific | |||
3753 | /// instruction. | |||
3754 | /// | |||
3755 | /// \param __i0 | |||
3756 | /// A 32-bit integral value used to initialize bits [31:0] of the result. | |||
3757 | /// \param __i1 | |||
3758 | /// A 32-bit integral value used to initialize bits [63:32] of the result. | |||
3759 | /// \param __i2 | |||
3760 | /// A 32-bit integral value used to initialize bits [95:64] of the result. | |||
3761 | /// \param __i3 | |||
3762 | /// A 32-bit integral value used to initialize bits [127:96] of the result. | |||
3763 | /// \returns An initialized 128-bit integer vector. | |||
3764 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, | |||
3765 | int __i2, | |||
3766 | int __i3) { | |||
3767 | return _mm_set_epi32(__i3, __i2, __i1, __i0); | |||
3768 | } | |||
3769 | ||||
3770 | /// Constructs a 128-bit integer vector, initialized in reverse order | |||
3771 | /// with the specified 16-bit integral values. | |||
3772 | /// | |||
3773 | /// \headerfile <x86intrin.h> | |||
3774 | /// | |||
3775 | /// This intrinsic is a utility function and does not correspond to a specific | |||
3776 | /// instruction. | |||
3777 | /// | |||
3778 | /// \param __w0 | |||
3779 | /// A 16-bit integral value used to initialize bits [15:0] of the result. | |||
3780 | /// \param __w1 | |||
3781 | /// A 16-bit integral value used to initialize bits [31:16] of the result. | |||
3782 | /// \param __w2 | |||
3783 | /// A 16-bit integral value used to initialize bits [47:32] of the result. | |||
3784 | /// \param __w3 | |||
3785 | /// A 16-bit integral value used to initialize bits [63:48] of the result. | |||
3786 | /// \param __w4 | |||
3787 | /// A 16-bit integral value used to initialize bits [79:64] of the result. | |||
3788 | /// \param __w5 | |||
3789 | /// A 16-bit integral value used to initialize bits [95:80] of the result. | |||
3790 | /// \param __w6 | |||
3791 | /// A 16-bit integral value used to initialize bits [111:96] of the result. | |||
3792 | /// \param __w7 | |||
3793 | /// A 16-bit integral value used to initialize bits [127:112] of the result. | |||
3794 | /// \returns An initialized 128-bit integer vector. | |||
3795 | static __inline__ __m128i __DEFAULT_FN_ATTRS | |||
3796 | _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, | |||
3797 | short __w5, short __w6, short __w7) { | |||
3798 | return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0); | |||
3799 | } | |||
3800 | ||||
3801 | /// Constructs a 128-bit integer vector, initialized in reverse order | |||
3802 | /// with the specified 8-bit integral values. | |||
3803 | /// | |||
3804 | /// \headerfile <x86intrin.h> | |||
3805 | /// | |||
3806 | /// This intrinsic is a utility function and does not correspond to a specific | |||
3807 | /// instruction. | |||
3808 | /// | |||
3809 | /// \param __b0 | |||
3810 | /// An 8-bit integral value used to initialize bits [7:0] of the result. | |||
3811 | /// \param __b1 | |||
3812 | /// An 8-bit integral value used to initialize bits [15:8] of the result. | |||
3813 | /// \param __b2 | |||
3814 | /// An 8-bit integral value used to initialize bits [23:16] of the result. | |||
3815 | /// \param __b3 | |||
3816 | /// An 8-bit integral value used to initialize bits [31:24] of the result. | |||
3817 | /// \param __b4 | |||
3818 | /// An 8-bit integral value used to initialize bits [39:32] of the result. | |||
3819 | /// \param __b5 | |||
3820 | /// An 8-bit integral value used to initialize bits [47:40] of the result. | |||
3821 | /// \param __b6 | |||
3822 | /// An 8-bit integral value used to initialize bits [55:48] of the result. | |||
3823 | /// \param __b7 | |||
3824 | /// An 8-bit integral value used to initialize bits [63:56] of the result. | |||
3825 | /// \param __b8 | |||
3826 | /// An 8-bit integral value used to initialize bits [71:64] of the result. | |||
3827 | /// \param __b9 | |||
3828 | /// An 8-bit integral value used to initialize bits [79:72] of the result. | |||
3829 | /// \param __b10 | |||
3830 | /// An 8-bit integral value used to initialize bits [87:80] of the result. | |||
3831 | /// \param __b11 | |||
3832 | /// An 8-bit integral value used to initialize bits [95:88] of the result. | |||
3833 | /// \param __b12 | |||
3834 | /// An 8-bit integral value used to initialize bits [103:96] of the result. | |||
3835 | /// \param __b13 | |||
3836 | /// An 8-bit integral value used to initialize bits [111:104] of the result. | |||
3837 | /// \param __b14 | |||
3838 | /// An 8-bit integral value used to initialize bits [119:112] of the result. | |||
3839 | /// \param __b15 | |||
3840 | /// An 8-bit integral value used to initialize bits [127:120] of the result. | |||
3841 | /// \returns An initialized 128-bit integer vector. | |||
3842 | static __inline__ __m128i __DEFAULT_FN_ATTRS | |||
3843 | _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, | |||
3844 | char __b6, char __b7, char __b8, char __b9, char __b10, | |||
3845 | char __b11, char __b12, char __b13, char __b14, char __b15) { | |||
3846 | return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, | |||
3847 | __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); | |||
3848 | } | |||
3849 | ||||
3850 | /// Creates a 128-bit integer vector initialized to zero. | |||
3851 | /// | |||
3852 | /// \headerfile <x86intrin.h> | |||
3853 | /// | |||
3854 | /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. | |||
3855 | /// | |||
3856 | /// \returns An initialized 128-bit integer vector with all elements set to | |||
3857 | /// zero. | |||
3858 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void) { | |||
3859 | return __extension__(__m128i)(__v2di){0LL, 0LL}; | |||
3860 | } | |||
3861 | ||||
3862 | /// Stores a 128-bit integer vector to a memory location aligned on a | |||
3863 | /// 128-bit boundary. | |||
3864 | /// | |||
3865 | /// \headerfile <x86intrin.h> | |||
3866 | /// | |||
3867 | /// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction. | |||
3868 | /// | |||
3869 | /// \param __p | |||
3870 | /// A pointer to an aligned memory location that will receive the integer | |||
3871 | /// values. | |||
3872 | /// \param __b | |||
3873 | /// A 128-bit integer vector containing the values to be moved. | |||
3874 | static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, | |||
3875 | __m128i __b) { | |||
3876 | *__p = __b; | |||
3877 | } | |||
3878 | ||||
3879 | /// Stores a 128-bit integer vector to an unaligned memory location. | |||
3880 | /// | |||
3881 | /// \headerfile <x86intrin.h> | |||
3882 | /// | |||
3883 | /// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction. | |||
3884 | /// | |||
3885 | /// \param __p | |||
3886 | /// A pointer to a memory location that will receive the integer values. | |||
3887 | /// \param __b | |||
3888 | /// A 128-bit integer vector containing the values to be moved. | |||
3889 | static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i_u *__p, | |||
3890 | __m128i __b) { | |||
3891 | struct __storeu_si128 { | |||
3892 | __m128i_u __v; | |||
3893 | } __attribute__((__packed__, __may_alias__)); | |||
3894 | ((struct __storeu_si128 *)__p)->__v = __b; | |||
3895 | } | |||
3896 | ||||
3897 | /// Stores a 64-bit integer value from the low element of a 128-bit integer | |||
3898 | /// vector. | |||
3899 | /// | |||
3900 | /// \headerfile <x86intrin.h> | |||
3901 | /// | |||
3902 | /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. | |||
3903 | /// | |||
3904 | /// \param __p | |||
3905 | /// A pointer to a 64-bit memory location. The address of the memory | |||
3906 | /// location does not have to be aligned. | |||
3907 | /// \param __b | |||
3908 | /// A 128-bit integer vector containing the value to be stored. | |||
3909 | static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si64(void *__p, | |||
3910 | __m128i __b) { | |||
3911 | struct __storeu_si64 { | |||
3912 | long long __v; | |||
3913 | } __attribute__((__packed__, __may_alias__)); | |||
3914 | ((struct __storeu_si64 *)__p)->__v = ((__v2di)__b)[0]; | |||
3915 | } | |||
3916 | ||||
3917 | /// Stores a 32-bit integer value from the low element of a 128-bit integer | |||
3918 | /// vector. | |||
3919 | /// | |||
3920 | /// \headerfile <x86intrin.h> | |||
3921 | /// | |||
3922 | /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. | |||
3923 | /// | |||
3924 | /// \param __p | |||
3925 | /// A pointer to a 32-bit memory location. The address of the memory | |||
3926 | /// location does not have to be aligned. | |||
3927 | /// \param __b | |||
3928 | /// A 128-bit integer vector containing the value to be stored. | |||
3929 | static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si32(void *__p, | |||
3930 | __m128i __b) { | |||
3931 | struct __storeu_si32 { | |||
3932 | int __v; | |||
3933 | } __attribute__((__packed__, __may_alias__)); | |||
3934 | ((struct __storeu_si32 *)__p)->__v = ((__v4si)__b)[0]; | |||
3935 | } | |||
3936 | ||||
3937 | /// Stores a 16-bit integer value from the low element of a 128-bit integer | |||
3938 | /// vector. | |||
3939 | /// | |||
3940 | /// \headerfile <x86intrin.h> | |||
3941 | /// | |||
3942 | /// This intrinsic does not correspond to a specific instruction. | |||
3943 | /// | |||
3944 | /// \param __p | |||
3945 | /// A pointer to a 16-bit memory location. The address of the memory | |||
3946 | /// location does not have to be aligned. | |||
3947 | /// \param __b | |||
3948 | /// A 128-bit integer vector containing the value to be stored. | |||
3949 | static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si16(void *__p, | |||
3950 | __m128i __b) { | |||
3951 | struct __storeu_si16 { | |||
3952 | short __v; | |||
3953 | } __attribute__((__packed__, __may_alias__)); | |||
3954 | ((struct __storeu_si16 *)__p)->__v = ((__v8hi)__b)[0]; | |||
3955 | } | |||
3956 | ||||
3957 | /// Moves bytes selected by the mask from the first operand to the | |||
3958 | /// specified unaligned memory location. When a mask bit is 1, the | |||
3959 | /// corresponding byte is written, otherwise it is not written. | |||
3960 | /// | |||
3961 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be | |||
3962 | /// used again soon). Exception and trap behavior for elements not selected | |||
3963 | /// for storage to memory are implementation dependent. | |||
3964 | /// | |||
3965 | /// \headerfile <x86intrin.h> | |||
3966 | /// | |||
3967 | /// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c> | |||
3968 | /// instruction. | |||
3969 | /// | |||
3970 | /// \param __d | |||
3971 | /// A 128-bit integer vector containing the values to be moved. | |||
3972 | /// \param __n | |||
3973 | /// A 128-bit integer vector containing the mask. The most significant bit of | |||
3974 | /// each byte represents the mask bits. | |||
3975 | /// \param __p | |||
3976 | /// A pointer to an unaligned 128-bit memory location where the specified | |||
3977 | /// values are moved. | |||
3978 | static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, | |||
3979 | __m128i __n, | |||
3980 | char *__p) { | |||
3981 | __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); | |||
3982 | } | |||
3983 | ||||
3984 | /// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to | |||
3985 | /// a memory location. | |||
3986 | /// | |||
3987 | /// \headerfile <x86intrin.h> | |||
3988 | /// | |||
3989 | /// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction. | |||
3990 | /// | |||
3991 | /// \param __p | |||
3992 | /// A pointer to a 64-bit memory location that will receive the lower 64 bits | |||
3993 | /// of the integer vector parameter. | |||
3994 | /// \param __a | |||
3995 | /// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the | |||
3996 | /// value to be stored. | |||
3997 | static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, | |||
3998 | __m128i __a) { | |||
3999 | struct __mm_storel_epi64_struct { | |||
4000 | long long __u; | |||
4001 | } __attribute__((__packed__, __may_alias__)); | |||
4002 | ((struct __mm_storel_epi64_struct *)__p)->__u = __a[0]; | |||
4003 | } | |||
4004 | ||||
4005 | /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit | |||
4006 | /// aligned memory location. | |||
4007 | /// | |||
4008 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be | |||
4009 | /// used again soon). | |||
4010 | /// | |||
4011 | /// \headerfile <x86intrin.h> | |||
4012 | /// | |||
4013 | /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. | |||
4014 | /// | |||
4015 | /// \param __p | |||
4016 | /// A pointer to the 128-bit aligned memory location used to store the value. | |||
4017 | /// \param __a | |||
4018 | /// A vector of [2 x double] containing the 64-bit values to be stored. | |||
4019 | static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p, | |||
4020 | __m128d __a) { | |||
4021 | __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); | |||
4022 | } | |||
4023 | ||||
4024 | /// Stores a 128-bit integer vector to a 128-bit aligned memory location. | |||
4025 | /// | |||
4026 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be | |||
4027 | /// used again soon). | |||
4028 | /// | |||
4029 | /// \headerfile <x86intrin.h> | |||
4030 | /// | |||
4031 | /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. | |||
4032 | /// | |||
4033 | /// \param __p | |||
4034 | /// A pointer to the 128-bit aligned memory location used to store the value. | |||
4035 | /// \param __a | |||
4036 | /// A 128-bit integer vector containing the values to be stored. | |||
4037 | static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p, | |||
4038 | __m128i __a) { | |||
4039 | __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); | |||
4040 | } | |||
4041 | ||||
4042 | /// Stores a 32-bit integer value in the specified memory location. | |||
4043 | /// | |||
4044 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be | |||
4045 | /// used again soon). | |||
4046 | /// | |||
4047 | /// \headerfile <x86intrin.h> | |||
4048 | /// | |||
4049 | /// This intrinsic corresponds to the <c> MOVNTI </c> instruction. | |||
4050 | /// | |||
4051 | /// \param __p | |||
4052 | /// A pointer to the 32-bit memory location used to store the value. | |||
4053 | /// \param __a | |||
4054 | /// A 32-bit integer containing the value to be stored. | |||
4055 | static __inline__ void | |||
4056 | __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) | |||
4057 | _mm_stream_si32(void *__p, int __a) { | |||
4058 | __builtin_ia32_movnti((int *)__p, __a); | |||
4059 | } | |||
4060 | ||||
4061 | #ifdef __x86_64__1 | |||
4062 | /// Stores a 64-bit integer value in the specified memory location. | |||
4063 | /// | |||
4064 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be | |||
4065 | /// used again soon). | |||
4066 | /// | |||
4067 | /// \headerfile <x86intrin.h> | |||
4068 | /// | |||
4069 | /// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction. | |||
4070 | /// | |||
4071 | /// \param __p | |||
4072 | /// A pointer to the 64-bit memory location used to store the value. | |||
4073 | /// \param __a | |||
4074 | /// A 64-bit integer containing the value to be stored. | |||
4075 | static __inline__ void | |||
4076 | __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) | |||
4077 | _mm_stream_si64(void *__p, long long __a) { | |||
4078 | __builtin_ia32_movnti64((long long *)__p, __a); | |||
4079 | } | |||
4080 | #endif | |||
4081 | ||||
4082 | #if defined(__cplusplus) | |||
4083 | extern "C" { | |||
4084 | #endif | |||
4085 | ||||
4086 | /// The cache line containing \a __p is flushed and invalidated from all | |||
4087 | /// caches in the coherency domain. | |||
4088 | /// | |||
4089 | /// \headerfile <x86intrin.h> | |||
4090 | /// | |||
4091 | /// This intrinsic corresponds to the <c> CLFLUSH </c> instruction. | |||
4092 | /// | |||
4093 | /// \param __p | |||
4094 | /// A pointer to the memory location used to identify the cache line to be | |||
4095 | /// flushed. | |||
4096 | void _mm_clflush(void const *__p); | |||
4097 | ||||
4098 | /// Forces strong memory ordering (serialization) between load | |||
4099 | /// instructions preceding this instruction and load instructions following | |||
4100 | /// this instruction, ensuring the system completes all previous loads before | |||
4101 | /// executing subsequent loads. | |||
4102 | /// | |||
4103 | /// \headerfile <x86intrin.h> | |||
4104 | /// | |||
4105 | /// This intrinsic corresponds to the <c> LFENCE </c> instruction. | |||
4106 | /// | |||
4107 | void _mm_lfence(void); | |||
4108 | ||||
4109 | /// Forces strong memory ordering (serialization) between load and store | |||
4110 | /// instructions preceding this instruction and load and store instructions | |||
4111 | /// following this instruction, ensuring that the system completes all | |||
4112 | /// previous memory accesses before executing subsequent memory accesses. | |||
4113 | /// | |||
4114 | /// \headerfile <x86intrin.h> | |||
4115 | /// | |||
4116 | /// This intrinsic corresponds to the <c> MFENCE </c> instruction. | |||
4117 | /// | |||
4118 | void _mm_mfence(void); | |||
4119 | ||||
4120 | #if defined(__cplusplus) | |||
4121 | } // extern "C" | |||
4122 | #endif | |||
4123 | ||||
4124 | /// Converts, with saturation, 16-bit signed integers from both 128-bit integer | |||
4125 | /// vector operands into 8-bit signed integers, and packs the results into | |||
4126 | /// the destination. | |||
4127 | /// | |||
4128 | /// Positive values greater than 0x7F are saturated to 0x7F. Negative values | |||
4129 | /// less than 0x80 are saturated to 0x80. | |||
4130 | /// | |||
4131 | /// \headerfile <x86intrin.h> | |||
4132 | /// | |||
4133 | /// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction. | |||
4134 | /// | |||
4135 | /// \param __a | |||
4136 | /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are | |||
4137 | /// written to the lower 64 bits of the result. | |||
4138 | /// \param __b | |||
4139 | /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are | |||
4140 | /// written to the higher 64 bits of the result. | |||
4141 | /// \returns A 128-bit vector of [16 x i8] containing the converted values. | |||
4142 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, | |||
4143 | __m128i __b) { | |||
4144 | return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); | |||
4145 | } | |||
4146 | ||||
4147 | /// Converts, with saturation, 32-bit signed integers from both 128-bit integer | |||
4148 | /// vector operands into 16-bit signed integers, and packs the results into | |||
4149 | /// the destination. | |||
4150 | /// | |||
4151 | /// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative | |||
4152 | /// values less than 0x8000 are saturated to 0x8000. | |||
4153 | /// | |||
4154 | /// \headerfile <x86intrin.h> | |||
4155 | /// | |||
4156 | /// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction. | |||
4157 | /// | |||
4158 | /// \param __a | |||
4159 | /// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values | |||
4160 | /// are written to the lower 64 bits of the result. | |||
4161 | /// \param __b | |||
4162 | /// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values | |||
4163 | /// are written to the higher 64 bits of the result. | |||
4164 | /// \returns A 128-bit vector of [8 x i16] containing the converted values. | |||
4165 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, | |||
4166 | __m128i __b) { | |||
4167 | return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); | |||
4168 | } | |||
4169 | ||||
4170 | /// Converts, with saturation, 16-bit signed integers from both 128-bit integer | |||
4171 | /// vector operands into 8-bit unsigned integers, and packs the results into | |||
4172 | /// the destination. | |||
4173 | /// | |||
4174 | /// Values greater than 0xFF are saturated to 0xFF. Values less than 0x00 | |||
4175 | /// are saturated to 0x00. | |||
4176 | /// | |||
4177 | /// \headerfile <x86intrin.h> | |||
4178 | /// | |||
4179 | /// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction. | |||
4180 | /// | |||
4181 | /// \param __a | |||
4182 | /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are | |||
4183 | /// written to the lower 64 bits of the result. | |||
4184 | /// \param __b | |||
4185 | /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are | |||
4186 | /// written to the higher 64 bits of the result. | |||
4187 | /// \returns A 128-bit vector of [16 x i8] containing the converted values. | |||
4188 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, | |||
4189 | __m128i __b) { | |||
4190 | return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); | |||
4191 | } | |||
4192 | ||||
4193 | /// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using | |||
4194 | /// the immediate-value parameter as a selector. | |||
4195 | /// | |||
4196 | /// \headerfile <x86intrin.h> | |||
4197 | /// | |||
4198 | /// \code | |||
4199 | /// __m128i _mm_extract_epi16(__m128i a, const int imm); | |||
4200 | /// \endcode | |||
4201 | /// | |||
4202 | /// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction. | |||
4203 | /// | |||
4204 | /// \param a | |||
4205 | /// A 128-bit integer vector. | |||
4206 | /// \param imm | |||
4207 | /// An immediate value. Bits [2:0] selects values from \a a to be assigned | |||
4208 | /// to bits[15:0] of the result. \n | |||
4209 | /// 000: assign values from bits [15:0] of \a a. \n | |||
4210 | /// 001: assign values from bits [31:16] of \a a. \n | |||
4211 | /// 010: assign values from bits [47:32] of \a a. \n | |||
4212 | /// 011: assign values from bits [63:48] of \a a. \n | |||
4213 | /// 100: assign values from bits [79:64] of \a a. \n | |||
4214 | /// 101: assign values from bits [95:80] of \a a. \n | |||
4215 | /// 110: assign values from bits [111:96] of \a a. \n | |||
4216 | /// 111: assign values from bits [127:112] of \a a. | |||
4217 | /// \returns An integer, whose lower 16 bits are selected from the 128-bit | |||
4218 | /// integer vector parameter and the remaining bits are assigned zeros. | |||
4219 | #define _mm_extract_epi16(a, imm)((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i )(a), (int)(imm))) \ | |||
4220 | ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \ | |||
4221 | (int)(imm))) | |||
4222 | ||||
4223 | /// Constructs a 128-bit integer vector by first making a copy of the | |||
4224 | /// 128-bit integer vector parameter, and then inserting the lower 16 bits | |||
4225 | /// of an integer parameter into an offset specified by the immediate-value | |||
4226 | /// parameter. | |||
4227 | /// | |||
4228 | /// \headerfile <x86intrin.h> | |||
4229 | /// | |||
4230 | /// \code | |||
4231 | /// __m128i _mm_insert_epi16(__m128i a, int b, const int imm); | |||
4232 | /// \endcode | |||
4233 | /// | |||
4234 | /// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction. | |||
4235 | /// | |||
4236 | /// \param a | |||
4237 | /// A 128-bit integer vector of [8 x i16]. This vector is copied to the | |||
4238 | /// result and then one of the eight elements in the result is replaced by | |||
4239 | /// the lower 16 bits of \a b. | |||
4240 | /// \param b | |||
4241 | /// An integer. The lower 16 bits of this parameter are written to the | |||
4242 | /// result beginning at an offset specified by \a imm. | |||
4243 | /// \param imm | |||
4244 | /// An immediate value specifying the bit offset in the result at which the | |||
4245 | /// lower 16 bits of \a b are written. | |||
4246 | /// \returns A 128-bit integer vector containing the constructed values. | |||
4247 | #define _mm_insert_epi16(a, b, imm)((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), ( int)(b), (int)(imm))) \ | |||
4248 | ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \ | |||
4249 | (int)(imm))) | |||
4250 | ||||
4251 | /// Copies the values of the most significant bits from each 8-bit | |||
4252 | /// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask | |||
4253 | /// value, zero-extends the value, and writes it to the destination. | |||
4254 | /// | |||
4255 | /// \headerfile <x86intrin.h> | |||
4256 | /// | |||
4257 | /// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction. | |||
4258 | /// | |||
4259 | /// \param __a | |||
4260 | /// A 128-bit integer vector containing the values with bits to be extracted. | |||
4261 | /// \returns The most significant bits from each 8-bit element in \a __a, | |||
4262 | /// written to bits [15:0]. The other bits are assigned zeros. | |||
4263 | static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { | |||
4264 | return __builtin_ia32_pmovmskb128((__v16qi)__a); | |||
4265 | } | |||
4266 | ||||
4267 | /// Constructs a 128-bit integer vector by shuffling four 32-bit | |||
4268 | /// elements of a 128-bit integer vector parameter, using the immediate-value | |||
4269 | /// parameter as a specifier. | |||
4270 | /// | |||
4271 | /// \headerfile <x86intrin.h> | |||
4272 | /// | |||
4273 | /// \code | |||
4274 | /// __m128i _mm_shuffle_epi32(__m128i a, const int imm); | |||
4275 | /// \endcode | |||
4276 | /// | |||
4277 | /// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction. | |||
4278 | /// | |||
4279 | /// \param a | |||
4280 | /// A 128-bit integer vector containing the values to be copied. | |||
4281 | /// \param imm | |||
4282 | /// An immediate value containing an 8-bit value specifying which elements to | |||
4283 | /// copy from a. The destinations within the 128-bit destination are assigned | |||
4284 | /// values as follows: \n | |||
4285 | /// Bits [1:0] are used to assign values to bits [31:0] of the result. \n | |||
4286 | /// Bits [3:2] are used to assign values to bits [63:32] of the result. \n | |||
4287 | /// Bits [5:4] are used to assign values to bits [95:64] of the result. \n | |||
4288 | /// Bits [7:6] are used to assign values to bits [127:96] of the result. \n | |||
4289 | /// Bit value assignments: \n | |||
4290 | /// 00: assign values from bits [31:0] of \a a. \n | |||
4291 | /// 01: assign values from bits [63:32] of \a a. \n | |||
4292 | /// 10: assign values from bits [95:64] of \a a. \n | |||
4293 | /// 11: assign values from bits [127:96] of \a a. \n | |||
4294 | /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. | |||
4295 | /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form | |||
4296 | /// <c>[b6, b4, b2, b0]</c>. | |||
4297 | /// \returns A 128-bit integer vector containing the shuffled values. | |||
4298 | #define _mm_shuffle_epi32(a, imm)((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm ))) \ | |||
4299 | ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))) | |||
4300 | ||||
4301 | /// Constructs a 128-bit integer vector by shuffling four lower 16-bit | |||
4302 | /// elements of a 128-bit integer vector of [8 x i16], using the immediate | |||
4303 | /// value parameter as a specifier. | |||
4304 | /// | |||
4305 | /// \headerfile <x86intrin.h> | |||
4306 | /// | |||
4307 | /// \code | |||
4308 | /// __m128i _mm_shufflelo_epi16(__m128i a, const int imm); | |||
4309 | /// \endcode | |||
4310 | /// | |||
4311 | /// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction. | |||
4312 | /// | |||
4313 | /// \param a | |||
4314 | /// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits | |||
4315 | /// [127:64] of the result. | |||
4316 | /// \param imm | |||
4317 | /// An 8-bit immediate value specifying which elements to copy from \a a. \n | |||
4318 | /// Bits[1:0] are used to assign values to bits [15:0] of the result. \n | |||
4319 | /// Bits[3:2] are used to assign values to bits [31:16] of the result. \n | |||
4320 | /// Bits[5:4] are used to assign values to bits [47:32] of the result. \n | |||
4321 | /// Bits[7:6] are used to assign values to bits [63:48] of the result. \n | |||
4322 | /// Bit value assignments: \n | |||
4323 | /// 00: assign values from bits [15:0] of \a a. \n | |||
4324 | /// 01: assign values from bits [31:16] of \a a. \n | |||
4325 | /// 10: assign values from bits [47:32] of \a a. \n | |||
4326 | /// 11: assign values from bits [63:48] of \a a. \n | |||
4327 | /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. | |||
4328 | /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form | |||
4329 | /// <c>[b6, b4, b2, b0]</c>. | |||
4330 | /// \returns A 128-bit integer vector containing the shuffled values. | |||
4331 | #define _mm_shufflelo_epi16(a, imm)((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)( imm))) \ | |||
4332 | ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))) | |||
4333 | ||||
4334 | /// Constructs a 128-bit integer vector by shuffling four upper 16-bit | |||
4335 | /// elements of a 128-bit integer vector of [8 x i16], using the immediate | |||
4336 | /// value parameter as a specifier. | |||
4337 | /// | |||
4338 | /// \headerfile <x86intrin.h> | |||
4339 | /// | |||
4340 | /// \code | |||
4341 | /// __m128i _mm_shufflehi_epi16(__m128i a, const int imm); | |||
4342 | /// \endcode | |||
4343 | /// | |||
4344 | /// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction. | |||
4345 | /// | |||
4346 | /// \param a | |||
4347 | /// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits | |||
4348 | /// [63:0] of the result. | |||
4349 | /// \param imm | |||
4350 | /// An 8-bit immediate value specifying which elements to copy from \a a. \n | |||
4351 | /// Bits[1:0] are used to assign values to bits [79:64] of the result. \n | |||
4352 | /// Bits[3:2] are used to assign values to bits [95:80] of the result. \n | |||
4353 | /// Bits[5:4] are used to assign values to bits [111:96] of the result. \n | |||
4354 | /// Bits[7:6] are used to assign values to bits [127:112] of the result. \n | |||
4355 | /// Bit value assignments: \n | |||
4356 | /// 00: assign values from bits [79:64] of \a a. \n | |||
4357 | /// 01: assign values from bits [95:80] of \a a. \n | |||
4358 | /// 10: assign values from bits [111:96] of \a a. \n | |||
4359 | /// 11: assign values from bits [127:112] of \a a. \n | |||
4360 | /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. | |||
4361 | /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form | |||
4362 | /// <c>[b6, b4, b2, b0]</c>. | |||
4363 | /// \returns A 128-bit integer vector containing the shuffled values. | |||
4364 | #define _mm_shufflehi_epi16(a, imm)((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)( imm))) \ | |||
4365 | ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))) | |||
4366 | ||||
4367 | /// Unpacks the high-order (index 8-15) values from two 128-bit vectors | |||
4368 | /// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. | |||
4369 | /// | |||
4370 | /// \headerfile <x86intrin.h> | |||
4371 | /// | |||
4372 | /// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c> | |||
4373 | /// instruction. | |||
4374 | /// | |||
4375 | /// \param __a | |||
4376 | /// A 128-bit vector of [16 x i8]. | |||
4377 | /// Bits [71:64] are written to bits [7:0] of the result. \n | |||
4378 | /// Bits [79:72] are written to bits [23:16] of the result. \n | |||
4379 | /// Bits [87:80] are written to bits [39:32] of the result. \n | |||
4380 | /// Bits [95:88] are written to bits [55:48] of the result. \n | |||
4381 | /// Bits [103:96] are written to bits [71:64] of the result. \n | |||
4382 | /// Bits [111:104] are written to bits [87:80] of the result. \n | |||
4383 | /// Bits [119:112] are written to bits [103:96] of the result. \n | |||
4384 | /// Bits [127:120] are written to bits [119:112] of the result. | |||
4385 | /// \param __b | |||
4386 | /// A 128-bit vector of [16 x i8]. \n | |||
4387 | /// Bits [71:64] are written to bits [15:8] of the result. \n | |||
4388 | /// Bits [79:72] are written to bits [31:24] of the result. \n | |||
4389 | /// Bits [87:80] are written to bits [47:40] of the result. \n | |||
4390 | /// Bits [95:88] are written to bits [63:56] of the result. \n | |||
4391 | /// Bits [103:96] are written to bits [79:72] of the result. \n | |||
4392 | /// Bits [111:104] are written to bits [95:88] of the result. \n | |||
4393 | /// Bits [119:112] are written to bits [111:104] of the result. \n | |||
4394 | /// Bits [127:120] are written to bits [127:120] of the result. | |||
4395 | /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. | |||
4396 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, | |||
4397 | __m128i __b) { | |||
4398 | return (__m128i)__builtin_shufflevector( | |||
4399 | (__v16qi)__a, (__v16qi)__b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11, | |||
4400 | 16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15); | |||
4401 | } | |||
4402 | ||||
4403 | /// Unpacks the high-order (index 4-7) values from two 128-bit vectors of | |||
4404 | /// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16]. | |||
4405 | /// | |||
4406 | /// \headerfile <x86intrin.h> | |||
4407 | /// | |||
4408 | /// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c> | |||
4409 | /// instruction. | |||
4410 | /// | |||
4411 | /// \param __a | |||
4412 | /// A 128-bit vector of [8 x i16]. | |||
4413 | /// Bits [79:64] are written to bits [15:0] of the result. \n | |||
4414 | /// Bits [95:80] are written to bits [47:32] of the result. \n | |||
4415 | /// Bits [111:96] are written to bits [79:64] of the result. \n | |||
4416 | /// Bits [127:112] are written to bits [111:96] of the result. | |||
4417 | /// \param __b | |||
4418 | /// A 128-bit vector of [8 x i16]. | |||
4419 | /// Bits [79:64] are written to bits [31:16] of the result. \n | |||
4420 | /// Bits [95:80] are written to bits [63:48] of the result. \n | |||
4421 | /// Bits [111:96] are written to bits [95:80] of the result. \n | |||
4422 | /// Bits [127:112] are written to bits [127:112] of the result. | |||
4423 | /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. | |||
4424 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, | |||
4425 | __m128i __b) { | |||
4426 | return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8 + 4, 5, | |||
4427 | 8 + 5, 6, 8 + 6, 7, 8 + 7); | |||
4428 | } | |||
4429 | ||||
4430 | /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of | |||
4431 | /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. | |||
4432 | /// | |||
4433 | /// \headerfile <x86intrin.h> | |||
4434 | /// | |||
4435 | /// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c> | |||
4436 | /// instruction. | |||
4437 | /// | |||
4438 | /// \param __a | |||
4439 | /// A 128-bit vector of [4 x i32]. \n | |||
4440 | /// Bits [95:64] are written to bits [31:0] of the destination. \n | |||
4441 | /// Bits [127:96] are written to bits [95:64] of the destination. | |||
4442 | /// \param __b | |||
4443 | /// A 128-bit vector of [4 x i32]. \n | |||
4444 | /// Bits [95:64] are written to bits [64:32] of the destination. \n | |||
4445 | /// Bits [127:96] are written to bits [127:96] of the destination. | |||
4446 | /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. | |||
4447 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, | |||
4448 | __m128i __b) { | |||
4449 | return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4 + 2, 3, | |||
4450 | 4 + 3); | |||
4451 | } | |||
4452 | ||||
4453 | /// Unpacks the high-order 64-bit elements from two 128-bit vectors of | |||
4454 | /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. | |||
4455 | /// | |||
4456 | /// \headerfile <x86intrin.h> | |||
4457 | /// | |||
4458 | /// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c> | |||
4459 | /// instruction. | |||
4460 | /// | |||
4461 | /// \param __a | |||
4462 | /// A 128-bit vector of [2 x i64]. \n | |||
4463 | /// Bits [127:64] are written to bits [63:0] of the destination. | |||
4464 | /// \param __b | |||
4465 | /// A 128-bit vector of [2 x i64]. \n | |||
4466 | /// Bits [127:64] are written to bits [127:64] of the destination. | |||
4467 | /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. | |||
4468 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, | |||
4469 | __m128i __b) { | |||
4470 | return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2 + 1); | |||
4471 | } | |||
4472 | ||||
4473 | /// Unpacks the low-order (index 0-7) values from two 128-bit vectors of | |||
4474 | /// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. | |||
4475 | /// | |||
4476 | /// \headerfile <x86intrin.h> | |||
4477 | /// | |||
4478 | /// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c> | |||
4479 | /// instruction. | |||
4480 | /// | |||
4481 | /// \param __a | |||
4482 | /// A 128-bit vector of [16 x i8]. \n | |||
4483 | /// Bits [7:0] are written to bits [7:0] of the result. \n | |||
4484 | /// Bits [15:8] are written to bits [23:16] of the result. \n | |||
4485 | /// Bits [23:16] are written to bits [39:32] of the result. \n | |||
4486 | /// Bits [31:24] are written to bits [55:48] of the result. \n | |||
4487 | /// Bits [39:32] are written to bits [71:64] of the result. \n | |||
4488 | /// Bits [47:40] are written to bits [87:80] of the result. \n | |||
4489 | /// Bits [55:48] are written to bits [103:96] of the result. \n | |||
4490 | /// Bits [63:56] are written to bits [119:112] of the result. | |||
4491 | /// \param __b | |||
4492 | /// A 128-bit vector of [16 x i8]. | |||
4493 | /// Bits [7:0] are written to bits [15:8] of the result. \n | |||
4494 | /// Bits [15:8] are written to bits [31:24] of the result. \n | |||
4495 | /// Bits [23:16] are written to bits [47:40] of the result. \n | |||
4496 | /// Bits [31:24] are written to bits [63:56] of the result. \n | |||
4497 | /// Bits [39:32] are written to bits [79:72] of the result. \n | |||
4498 | /// Bits [47:40] are written to bits [95:88] of the result. \n | |||
4499 | /// Bits [55:48] are written to bits [111:104] of the result. \n | |||
4500 | /// Bits [63:56] are written to bits [127:120] of the result. | |||
4501 | /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. | |||
4502 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, | |||
4503 | __m128i __b) { | |||
4504 | return (__m128i)__builtin_shufflevector( | |||
4505 | (__v16qi)__a, (__v16qi)__b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, | |||
4506 | 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7); | |||
4507 | } | |||
4508 | ||||
4509 | /// Unpacks the low-order (index 0-3) values from each of the two 128-bit | |||
4510 | /// vectors of [8 x i16] and interleaves them into a 128-bit vector of | |||
4511 | /// [8 x i16]. | |||
4512 | /// | |||
4513 | /// \headerfile <x86intrin.h> | |||
4514 | /// | |||
4515 | /// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c> | |||
4516 | /// instruction. | |||
4517 | /// | |||
4518 | /// \param __a | |||
4519 | /// A 128-bit vector of [8 x i16]. | |||
4520 | /// Bits [15:0] are written to bits [15:0] of the result. \n | |||
4521 | /// Bits [31:16] are written to bits [47:32] of the result. \n | |||
4522 | /// Bits [47:32] are written to bits [79:64] of the result. \n | |||
4523 | /// Bits [63:48] are written to bits [111:96] of the result. | |||
4524 | /// \param __b | |||
4525 | /// A 128-bit vector of [8 x i16]. | |||
4526 | /// Bits [15:0] are written to bits [31:16] of the result. \n | |||
4527 | /// Bits [31:16] are written to bits [63:48] of the result. \n | |||
4528 | /// Bits [47:32] are written to bits [95:80] of the result. \n | |||
4529 | /// Bits [63:48] are written to bits [127:112] of the result. | |||
4530 | /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. | |||
4531 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, | |||
4532 | __m128i __b) { | |||
4533 | return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8 + 0, 1, | |||
4534 | 8 + 1, 2, 8 + 2, 3, 8 + 3); | |||
4535 | } | |||
4536 | ||||
4537 | /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of | |||
4538 | /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. | |||
4539 | /// | |||
4540 | /// \headerfile <x86intrin.h> | |||
4541 | /// | |||
4542 | /// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c> | |||
4543 | /// instruction. | |||
4544 | /// | |||
4545 | /// \param __a | |||
4546 | /// A 128-bit vector of [4 x i32]. \n | |||
4547 | /// Bits [31:0] are written to bits [31:0] of the destination. \n | |||
4548 | /// Bits [63:32] are written to bits [95:64] of the destination. | |||
4549 | /// \param __b | |||
4550 | /// A 128-bit vector of [4 x i32]. \n | |||
4551 | /// Bits [31:0] are written to bits [64:32] of the destination. \n | |||
4552 | /// Bits [63:32] are written to bits [127:96] of the destination. | |||
4553 | /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. | |||
4554 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, | |||
4555 | __m128i __b) { | |||
4556 | return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4 + 0, 1, | |||
4557 | 4 + 1); | |||
4558 | } | |||
4559 | ||||
4560 | /// Unpacks the low-order 64-bit elements from two 128-bit vectors of | |||
4561 | /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. | |||
4562 | /// | |||
4563 | /// \headerfile <x86intrin.h> | |||
4564 | /// | |||
4565 | /// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> | |||
4566 | /// instruction. | |||
4567 | /// | |||
4568 | /// \param __a | |||
4569 | /// A 128-bit vector of [2 x i64]. \n | |||
4570 | /// Bits [63:0] are written to bits [63:0] of the destination. \n | |||
4571 | /// \param __b | |||
4572 | /// A 128-bit vector of [2 x i64]. \n | |||
4573 | /// Bits [63:0] are written to bits [127:64] of the destination. \n | |||
4574 | /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. | |||
4575 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, | |||
4576 | __m128i __b) { | |||
4577 | return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2 + 0); | |||
4578 | } | |||
4579 | ||||
4580 | /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit | |||
4581 | /// integer. | |||
4582 | /// | |||
4583 | /// \headerfile <x86intrin.h> | |||
4584 | /// | |||
4585 | /// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction. | |||
4586 | /// | |||
4587 | /// \param __a | |||
4588 | /// A 128-bit integer vector operand. The lower 64 bits are moved to the | |||
4589 | /// destination. | |||
4590 | /// \returns A 64-bit integer containing the lower 64 bits of the parameter. | |||
4591 | static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a) { | |||
4592 | return (__m64)__a[0]; | |||
4593 | } | |||
4594 | ||||
4595 | /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the | |||
4596 | /// upper bits. | |||
4597 | /// | |||
4598 | /// \headerfile <x86intrin.h> | |||
4599 | /// | |||
4600 | /// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction. | |||
4601 | /// | |||
4602 | /// \param __a | |||
4603 | /// A 64-bit value. | |||
4604 | /// \returns A 128-bit integer vector. The lower 64 bits contain the value from | |||
4605 | /// the operand. The upper 64 bits are assigned zeros. | |||
4606 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a) { | |||
4607 | return __extension__(__m128i)(__v2di){(long long)__a, 0}; | |||
4608 | } | |||
4609 | ||||
4610 | /// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit | |||
4611 | /// integer vector, zeroing the upper bits. | |||
4612 | /// | |||
4613 | /// \headerfile <x86intrin.h> | |||
4614 | /// | |||
4615 | /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. | |||
4616 | /// | |||
4617 | /// \param __a | |||
4618 | /// A 128-bit integer vector operand. The lower 64 bits are moved to the | |||
4619 | /// destination. | |||
4620 | /// \returns A 128-bit integer vector. The lower 64 bits contain the value from | |||
4621 | /// the operand. The upper 64 bits are assigned zeros. | |||
4622 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a) { | |||
4623 | return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2); | |||
4624 | } | |||
4625 | ||||
4626 | /// Unpacks the high-order 64-bit elements from two 128-bit vectors of | |||
4627 | /// [2 x double] and interleaves them into a 128-bit vector of [2 x | |||
4628 | /// double]. | |||
4629 | /// | |||
4630 | /// \headerfile <x86intrin.h> | |||
4631 | /// | |||
4632 | /// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction. | |||
4633 | /// | |||
4634 | /// \param __a | |||
4635 | /// A 128-bit vector of [2 x double]. \n | |||
4636 | /// Bits [127:64] are written to bits [63:0] of the destination. | |||
4637 | /// \param __b | |||
4638 | /// A 128-bit vector of [2 x double]. \n | |||
4639 | /// Bits [127:64] are written to bits [127:64] of the destination. | |||
4640 | /// \returns A 128-bit vector of [2 x double] containing the interleaved values. | |||
4641 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, | |||
4642 | __m128d __b) { | |||
4643 | return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2 + 1); | |||
4644 | } | |||
4645 | ||||
4646 | /// Unpacks the low-order 64-bit elements from two 128-bit vectors | |||
4647 | /// of [2 x double] and interleaves them into a 128-bit vector of [2 x | |||
4648 | /// double]. | |||
4649 | /// | |||
4650 | /// \headerfile <x86intrin.h> | |||
4651 | /// | |||
4652 | /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. | |||
4653 | /// | |||
4654 | /// \param __a | |||
4655 | /// A 128-bit vector of [2 x double]. \n | |||
4656 | /// Bits [63:0] are written to bits [63:0] of the destination. | |||
4657 | /// \param __b | |||
4658 | /// A 128-bit vector of [2 x double]. \n | |||
4659 | /// Bits [63:0] are written to bits [127:64] of the destination. | |||
4660 | /// \returns A 128-bit vector of [2 x double] containing the interleaved values. | |||
4661 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, | |||
4662 | __m128d __b) { | |||
4663 | return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2 + 0); | |||
4664 | } | |||
4665 | ||||
4666 | /// Extracts the sign bits of the double-precision values in the 128-bit | |||
4667 | /// vector of [2 x double], zero-extends the value, and writes it to the | |||
4668 | /// low-order bits of the destination. | |||
4669 | /// | |||
4670 | /// \headerfile <x86intrin.h> | |||
4671 | /// | |||
4672 | /// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction. | |||
4673 | /// | |||
4674 | /// \param __a | |||
4675 | /// A 128-bit vector of [2 x double] containing the values with sign bits to | |||
4676 | /// be extracted. | |||
4677 | /// \returns The sign bits from each of the double-precision elements in \a __a, | |||
4678 | /// written to bits [1:0]. The remaining bits are assigned values of zero. | |||
4679 | static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) { | |||
4680 | return __builtin_ia32_movmskpd((__v2df)__a); | |||
4681 | } | |||
4682 | ||||
4683 | /// Constructs a 128-bit floating-point vector of [2 x double] from two | |||
4684 | /// 128-bit vector parameters of [2 x double], using the immediate-value | |||
4685 | /// parameter as a specifier. | |||
4686 | /// | |||
4687 | /// \headerfile <x86intrin.h> | |||
4688 | /// | |||
4689 | /// \code | |||
4690 | /// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i); | |||
4691 | /// \endcode | |||
4692 | /// | |||
4693 | /// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction. | |||
4694 | /// | |||
4695 | /// \param a | |||
4696 | /// A 128-bit vector of [2 x double]. | |||
4697 | /// \param b | |||
4698 | /// A 128-bit vector of [2 x double]. | |||
4699 | /// \param i | |||
4700 | /// An 8-bit immediate value. The least significant two bits specify which | |||
4701 | /// elements to copy from \a a and \a b: \n | |||
4702 | /// Bit[0] = 0: lower element of \a a copied to lower element of result. \n | |||
4703 | /// Bit[0] = 1: upper element of \a a copied to lower element of result. \n | |||
4704 | /// Bit[1] = 0: lower element of \a b copied to upper element of result. \n | |||
4705 | /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n | |||
4706 | /// Note: To generate a mask, you can use the \c _MM_SHUFFLE2 macro. | |||
4707 | /// <c>_MM_SHUFFLE2(b1, b0)</c> can create a 2-bit mask of the form | |||
4708 | /// <c>[b1, b0]</c>. | |||
4709 | /// \returns A 128-bit vector of [2 x double] containing the shuffled values. | |||
4710 | #define _mm_shuffle_pd(a, b, i)((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df )(__m128d)(b), (int)(i))) \ | |||
4711 | ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ | |||
4712 | (int)(i))) | |||
4713 | ||||
4714 | /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit | |||
4715 | /// floating-point vector of [4 x float]. | |||
4716 | /// | |||
4717 | /// \headerfile <x86intrin.h> | |||
4718 | /// | |||
4719 | /// This intrinsic has no corresponding instruction. | |||
4720 | /// | |||
4721 | /// \param __a | |||
4722 | /// A 128-bit floating-point vector of [2 x double]. | |||
4723 | /// \returns A 128-bit floating-point vector of [4 x float] containing the same | |||
4724 | /// bitwise pattern as the parameter. | |||
4725 | static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a) { | |||
4726 | return (__m128)__a; | |||
4727 | } | |||
4728 | ||||
4729 | /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit | |||
4730 | /// integer vector. | |||
4731 | /// | |||
4732 | /// \headerfile <x86intrin.h> | |||
4733 | /// | |||
4734 | /// This intrinsic has no corresponding instruction. | |||
4735 | /// | |||
4736 | /// \param __a | |||
4737 | /// A 128-bit floating-point vector of [2 x double]. | |||
4738 | /// \returns A 128-bit integer vector containing the same bitwise pattern as the | |||
4739 | /// parameter. | |||
4740 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a) { | |||
4741 | return (__m128i)__a; | |||
4742 | } | |||
4743 | ||||
4744 | /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit | |||
4745 | /// floating-point vector of [2 x double]. | |||
4746 | /// | |||
4747 | /// \headerfile <x86intrin.h> | |||
4748 | /// | |||
4749 | /// This intrinsic has no corresponding instruction. | |||
4750 | /// | |||
4751 | /// \param __a | |||
4752 | /// A 128-bit floating-point vector of [4 x float]. | |||
4753 | /// \returns A 128-bit floating-point vector of [2 x double] containing the same | |||
4754 | /// bitwise pattern as the parameter. | |||
4755 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a) { | |||
4756 | return (__m128d)__a; | |||
4757 | } | |||
4758 | ||||
4759 | /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit | |||
4760 | /// integer vector. | |||
4761 | /// | |||
4762 | /// \headerfile <x86intrin.h> | |||
4763 | /// | |||
4764 | /// This intrinsic has no corresponding instruction. | |||
4765 | /// | |||
4766 | /// \param __a | |||
4767 | /// A 128-bit floating-point vector of [4 x float]. | |||
4768 | /// \returns A 128-bit integer vector containing the same bitwise pattern as the | |||
4769 | /// parameter. | |||
4770 | static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a) { | |||
4771 | return (__m128i)__a; | |||
4772 | } | |||
4773 | ||||
4774 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector | |||
4775 | /// of [4 x float]. | |||
4776 | /// | |||
4777 | /// \headerfile <x86intrin.h> | |||
4778 | /// | |||
4779 | /// This intrinsic has no corresponding instruction. | |||
4780 | /// | |||
4781 | /// \param __a | |||
4782 | /// A 128-bit integer vector. | |||
4783 | /// \returns A 128-bit floating-point vector of [4 x float] containing the same | |||
4784 | /// bitwise pattern as the parameter. | |||
4785 | static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a) { | |||
4786 | return (__m128)__a; | |||
4787 | } | |||
4788 | ||||
4789 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector | |||
4790 | /// of [2 x double]. | |||
4791 | /// | |||
4792 | /// \headerfile <x86intrin.h> | |||
4793 | /// | |||
4794 | /// This intrinsic has no corresponding instruction. | |||
4795 | /// | |||
4796 | /// \param __a | |||
4797 | /// A 128-bit integer vector. | |||
4798 | /// \returns A 128-bit floating-point vector of [2 x double] containing the same | |||
4799 | /// bitwise pattern as the parameter. | |||
4800 | static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) { | |||
4801 | return (__m128d)__a; | |||
4802 | } | |||
4803 | ||||
4804 | /// Compares each of the corresponding double-precision values of two | |||
4805 | /// 128-bit vectors of [2 x double], using the operation specified by the | |||
4806 | /// immediate integer operand. | |||
4807 | /// | |||
4808 | /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
4809 | /// If either value in a comparison is NaN, comparisons that are ordered | |||
4810 | /// return false, and comparisons that are unordered return true. | |||
4811 | /// | |||
4812 | /// \headerfile <x86intrin.h> | |||
4813 | /// | |||
4814 | /// \code | |||
4815 | /// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c); | |||
4816 | /// \endcode | |||
4817 | /// | |||
4818 | /// This intrinsic corresponds to the <c> (V)CMPPD </c> instruction. | |||
4819 | /// | |||
4820 | /// \param a | |||
4821 | /// A 128-bit vector of [2 x double]. | |||
4822 | /// \param b | |||
4823 | /// A 128-bit vector of [2 x double]. | |||
4824 | /// \param c | |||
4825 | /// An immediate integer operand, with bits [4:0] specifying which comparison | |||
4826 | /// operation to use: \n | |||
4827 | /// 0x00: Equal (ordered, non-signaling) \n | |||
4828 | /// 0x01: Less-than (ordered, signaling) \n | |||
4829 | /// 0x02: Less-than-or-equal (ordered, signaling) \n | |||
4830 | /// 0x03: Unordered (non-signaling) \n | |||
4831 | /// 0x04: Not-equal (unordered, non-signaling) \n | |||
4832 | /// 0x05: Not-less-than (unordered, signaling) \n | |||
4833 | /// 0x06: Not-less-than-or-equal (unordered, signaling) \n | |||
4834 | /// 0x07: Ordered (non-signaling) \n | |||
4835 | /// \returns A 128-bit vector of [2 x double] containing the comparison results. | |||
4836 | #define _mm_cmp_pd(a, b, c)((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), (__v2df) (__m128d)(b), (c))) \ | |||
4837 | ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ | |||
4838 | (c))) | |||
4839 | ||||
4840 | /// Compares each of the corresponding scalar double-precision values of | |||
4841 | /// two 128-bit vectors of [2 x double], using the operation specified by the | |||
4842 | /// immediate integer operand. | |||
4843 | /// | |||
4844 | /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. | |||
4845 | /// If either value in a comparison is NaN, comparisons that are ordered | |||
4846 | /// return false, and comparisons that are unordered return true. | |||
4847 | /// | |||
4848 | /// \headerfile <x86intrin.h> | |||
4849 | /// | |||
4850 | /// \code | |||
4851 | /// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c); | |||
4852 | /// \endcode | |||
4853 | /// | |||
4854 | /// This intrinsic corresponds to the <c> (V)CMPSD </c> instruction. | |||
4855 | /// | |||
4856 | /// \param a | |||
4857 | /// A 128-bit vector of [2 x double]. | |||
4858 | /// \param b | |||
4859 | /// A 128-bit vector of [2 x double]. | |||
4860 | /// \param c | |||
4861 | /// An immediate integer operand, with bits [4:0] specifying which comparison | |||
4862 | /// operation to use: \n | |||
4863 | /// 0x00: Equal (ordered, non-signaling) \n | |||
4864 | /// 0x01: Less-than (ordered, signaling) \n | |||
4865 | /// 0x02: Less-than-or-equal (ordered, signaling) \n | |||
4866 | /// 0x03: Unordered (non-signaling) \n | |||
4867 | /// 0x04: Not-equal (unordered, non-signaling) \n | |||
4868 | /// 0x05: Not-less-than (unordered, signaling) \n | |||
4869 | /// 0x06: Not-less-than-or-equal (unordered, signaling) \n | |||
4870 | /// 0x07: Ordered (non-signaling) \n | |||
4871 | /// \returns A 128-bit vector of [2 x double] containing the comparison results. | |||
4872 | #define _mm_cmp_sd(a, b, c)((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), (__v2df) (__m128d)(b), (c))) \ | |||
4873 | ((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ | |||
4874 | (c))) | |||
4875 | ||||
4876 | #if defined(__cplusplus) | |||
4877 | extern "C" { | |||
4878 | #endif | |||
4879 | ||||
4880 | /// Indicates that a spin loop is being executed for the purposes of | |||
4881 | /// optimizing power consumption during the loop. | |||
4882 | /// | |||
4883 | /// \headerfile <x86intrin.h> | |||
4884 | /// | |||
4885 | /// This intrinsic corresponds to the <c> PAUSE </c> instruction. | |||
4886 | /// | |||
4887 | void _mm_pause(void); | |||
4888 | ||||
4889 | #if defined(__cplusplus) | |||
4890 | } // extern "C" | |||
4891 | #endif | |||
4892 | #undef __DEFAULT_FN_ATTRS | |||
4893 | #undef __DEFAULT_FN_ATTRS_MMX | |||
4894 | ||||
4895 | #define _MM_SHUFFLE2(x, y)(((x) << 1) | (y)) (((x) << 1) | (y)) | |||
4896 | ||||
4897 | #define _MM_DENORMALS_ZERO_ON(0x0040U) (0x0040U) | |||
4898 | #define _MM_DENORMALS_ZERO_OFF(0x0000U) (0x0000U) | |||
4899 | ||||
4900 | #define _MM_DENORMALS_ZERO_MASK(0x0040U) (0x0040U) | |||
4901 | ||||
4902 | #define _MM_GET_DENORMALS_ZERO_MODE()(_mm_getcsr() & (0x0040U)) (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK(0x0040U)) | |||
4903 | #define _MM_SET_DENORMALS_ZERO_MODE(x)(_mm_setcsr((_mm_getcsr() & ~(0x0040U)) | (x))) \ | |||
4904 | (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK(0x0040U)) | (x))) | |||
4905 | ||||
4906 | #endif /* __EMMINTRIN_H */ |