Bug Summary

File:usr/lib/llvm-20/lib/clang/20/include/emmintrin.h
Warning:line 3462, column 10
Access to field '__v' results in a dereference of a null pointer (loaded from variable '__p')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name pixman-sse2.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/gfx/cairo/libpixman/src -fcoverage-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/gfx/cairo/libpixman/src -resource-dir /usr/lib/llvm-20/lib/clang/20 -include /var/lib/jenkins/workspace/firefox-scan-build/config/gcc_hidden.h -include /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/mozilla-config.h -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -D HAVE_PTHREADS -D PACKAGE=mozpixman -D USE_X86_MMX -D USE_SSE2 -D USE_SSSE3 -D MOZ_HAS_MOZGLUE -D MOZILLA_INTERNAL_API -D IMPL_LIBXUL -D MOZ_SUPPORT_LEAKCHECKING -D STATIC_EXPORTABLE_JS_API -I /var/lib/jenkins/workspace/firefox-scan-build/gfx/cairo/libpixman/src -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/gfx/cairo/libpixman/src -I /var/lib/jenkins/workspace/firefox-scan-build/gfx/cairo/cairo/src -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/llvm-20/lib/clang/20/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=tautological-type-limit-compare -Wno-range-loop-analysis -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-unknown-warning-option -Wno-address -Wno-braced-scalar-init -Wno-missing-field-initializers -Wno-sign-compare -Wno-incompatible-pointer-types -Wno-unused -Wno-incompatible-pointer-types -Wno-tautological-compare -Wno-tautological-constant-out-of-range-compare -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-01-20-090804-167946-1 -x c /var/lib/jenkins/workspace/firefox-scan-build/gfx/cairo/libpixman/src/pixman-sse2.c

/var/lib/jenkins/workspace/firefox-scan-build/gfx/cairo/libpixman/src/pixman-sse2.c

1/*
2 * Copyright © 2008 Rodrigo Kumpera
3 * Copyright © 2008 André Tupinambá
4 *
5 * Permission to use, copy, modify, distribute, and sell this software and its
6 * documentation for any purpose is hereby granted without fee, provided that
7 * the above copyright notice appear in all copies and that both that
8 * copyright notice and this permission notice appear in supporting
9 * documentation, and that the name of Red Hat not be used in advertising or
10 * publicity pertaining to distribution of the software without specific,
11 * written prior permission. Red Hat makes no representations about the
12 * suitability of this software for any purpose. It is provided "as is"
13 * without express or implied warranty.
14 *
15 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
20 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
21 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
22 * SOFTWARE.
23 *
24 * Author: Rodrigo Kumpera (kumpera@gmail.com)
25 * André Tupinambá (andrelrt@gmail.com)
26 *
27 * Based on work by Owen Taylor and Søren Sandmann
28 */
29#ifdef HAVE_CONFIG_H
30#include <pixman-config.h>
31#endif
32
33/* PSHUFD is slow on a lot of old processors, and new processors have SSSE3 */
34#define PSHUFD_IS_FAST0 0
35
36#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
37#include <emmintrin.h> /* for SSE2 intrinsics */
38#include "pixman-private.h"
39#include "pixman-combine32.h"
40#include "pixman-inlines.h"
41
42static __m128i mask_0080;
43static __m128i mask_00ff;
44static __m128i mask_0101;
45static __m128i mask_ffff;
46static __m128i mask_ff000000;
47static __m128i mask_alpha;
48
49static __m128i mask_565_r;
50static __m128i mask_565_g1, mask_565_g2;
51static __m128i mask_565_b;
52static __m128i mask_red;
53static __m128i mask_green;
54static __m128i mask_blue;
55
56static __m128i mask_565_fix_rb;
57static __m128i mask_565_fix_g;
58
59static __m128i mask_565_rb;
60static __m128i mask_565_pack_multiplier;
61
62static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
63unpack_32_1x128 (uint32_t data)
64{
65 return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ());
66}
67
68static force_inline__inline__ __attribute__ ((__always_inline__)) void
69unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi)
70{
71 *data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ());
72 *data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ());
73}
74
75static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
76unpack_565_to_8888 (__m128i lo)
77{
78 __m128i r, g, b, rb, t;
79
80 r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red);
81 g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green);
82 b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue);
83
84 rb = _mm_or_si128 (r, b);
85 t = _mm_and_si128 (rb, mask_565_fix_rb);
86 t = _mm_srli_epi32 (t, 5);
87 rb = _mm_or_si128 (rb, t);
88
89 t = _mm_and_si128 (g, mask_565_fix_g);
90 t = _mm_srli_epi32 (t, 6);
91 g = _mm_or_si128 (g, t);
92
93 return _mm_or_si128 (rb, g);
94}
95
96static force_inline__inline__ __attribute__ ((__always_inline__)) void
97unpack_565_128_4x128 (__m128i data,
98 __m128i* data0,
99 __m128i* data1,
100 __m128i* data2,
101 __m128i* data3)
102{
103 __m128i lo, hi;
104
105 lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
106 hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
107
108 lo = unpack_565_to_8888 (lo);
109 hi = unpack_565_to_8888 (hi);
110
111 unpack_128_2x128 (lo, data0, data1);
112 unpack_128_2x128 (hi, data2, data3);
113}
114
115static force_inline__inline__ __attribute__ ((__always_inline__)) uint16_t
116pack_565_32_16 (uint32_t pixel)
117{
118 return (uint16_t) (((pixel >> 8) & 0xf800) |
119 ((pixel >> 5) & 0x07e0) |
120 ((pixel >> 3) & 0x001f));
121}
122
123static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
124pack_2x128_128 (__m128i lo, __m128i hi)
125{
126 return _mm_packus_epi16 (lo, hi);
127}
128
129static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
130pack_565_2packedx128_128 (__m128i lo, __m128i hi)
131{
132 __m128i rb0 = _mm_and_si128 (lo, mask_565_rb);
133 __m128i rb1 = _mm_and_si128 (hi, mask_565_rb);
134
135 __m128i t0 = _mm_madd_epi16 (rb0, mask_565_pack_multiplier);
136 __m128i t1 = _mm_madd_epi16 (rb1, mask_565_pack_multiplier);
137
138 __m128i g0 = _mm_and_si128 (lo, mask_green);
139 __m128i g1 = _mm_and_si128 (hi, mask_green);
140
141 t0 = _mm_or_si128 (t0, g0);
142 t1 = _mm_or_si128 (t1, g1);
143
144 /* Simulates _mm_packus_epi32 */
145 t0 = _mm_slli_epi32 (t0, 16 - 5);
146 t1 = _mm_slli_epi32 (t1, 16 - 5);
147 t0 = _mm_srai_epi32 (t0, 16);
148 t1 = _mm_srai_epi32 (t1, 16);
149 return _mm_packs_epi32 (t0, t1);
150}
151
152static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
153pack_565_2x128_128 (__m128i lo, __m128i hi)
154{
155 __m128i data;
156 __m128i r, g1, g2, b;
157
158 data = pack_2x128_128 (lo, hi);
159
160 r = _mm_and_si128 (data, mask_565_r);
161 g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1);
162 g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2);
163 b = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b);
164
165 return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b);
166}
167
168static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
169pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3)
170{
171 return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1),
172 pack_565_2x128_128 (*xmm2, *xmm3));
173}
174
175static force_inline__inline__ __attribute__ ((__always_inline__)) int
176is_opaque (__m128i x)
177{
178 __m128i ffs = _mm_cmpeq_epi8 (x, x);
179
180 return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888;
181}
182
183static force_inline__inline__ __attribute__ ((__always_inline__)) int
184is_zero (__m128i x)
185{
186 return _mm_movemask_epi8 (
187 _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff;
188}
189
190static force_inline__inline__ __attribute__ ((__always_inline__)) int
191is_transparent (__m128i x)
192{
193 return (_mm_movemask_epi8 (
194 _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888;
195}
196
197static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
198expand_pixel_32_1x128 (uint32_t data)
199{
200 return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0))((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(unpack_32_1x128
(data)), (int)((((1) << 6) | ((0) << 4) | ((1) <<
2) | (0)))))
;
201}
202
203static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
204expand_alpha_1x128 (__m128i data)
205{
206 return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data,((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(((__m128i)
__builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int)((((3) <<
6) | ((3) << 4) | ((3) << 2) | (3)))))), (int)((
((3) << 6) | ((3) << 4) | ((3) << 2) | (3))
)))
207 _MM_SHUFFLE (3, 3, 3, 3)),((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(((__m128i)
__builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int)((((3) <<
6) | ((3) << 4) | ((3) << 2) | (3)))))), (int)((
((3) << 6) | ((3) << 4) | ((3) << 2) | (3))
)))
208 _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(((__m128i)
__builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int)((((3) <<
6) | ((3) << 4) | ((3) << 2) | (3)))))), (int)((
((3) << 6) | ((3) << 4) | ((3) << 2) | (3))
)))
;
209}
210
211static force_inline__inline__ __attribute__ ((__always_inline__)) void
212expand_alpha_2x128 (__m128i data_lo,
213 __m128i data_hi,
214 __m128i* alpha_lo,
215 __m128i* alpha_hi)
216{
217 __m128i lo, hi;
218
219 lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_lo), (
int)((((3) << 6) | ((3) << 4) | ((3) << 2) |
(3)))))
;
220 hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_hi), (
int)((((3) << 6) | ((3) << 4) | ((3) << 2) |
(3)))))
;
221
222 *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(lo), (int)
((((3) << 6) | ((3) << 4) | ((3) << 2) | (3
)))))
;
223 *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(hi), (int)
((((3) << 6) | ((3) << 4) | ((3) << 2) | (3
)))))
;
224}
225
226static force_inline__inline__ __attribute__ ((__always_inline__)) void
227expand_alpha_rev_2x128 (__m128i data_lo,
228 __m128i data_hi,
229 __m128i* alpha_lo,
230 __m128i* alpha_hi)
231{
232 __m128i lo, hi;
233
234 lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_lo), (
int)((((0) << 6) | ((0) << 4) | ((0) << 2) |
(0)))))
;
235 hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_hi), (
int)((((0) << 6) | ((0) << 4) | ((0) << 2) |
(0)))))
;
236 *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(lo), (int)
((((0) << 6) | ((0) << 4) | ((0) << 2) | (0
)))))
;
237 *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(hi), (int)
((((0) << 6) | ((0) << 4) | ((0) << 2) | (0
)))))
;
238}
239
240static force_inline__inline__ __attribute__ ((__always_inline__)) void
241pix_multiply_2x128 (__m128i* data_lo,
242 __m128i* data_hi,
243 __m128i* alpha_lo,
244 __m128i* alpha_hi,
245 __m128i* ret_lo,
246 __m128i* ret_hi)
247{
248 __m128i lo, hi;
249
250 lo = _mm_mullo_epi16 (*data_lo, *alpha_lo);
251 hi = _mm_mullo_epi16 (*data_hi, *alpha_hi);
252 lo = _mm_adds_epu16 (lo, mask_0080);
253 hi = _mm_adds_epu16 (hi, mask_0080);
254 *ret_lo = _mm_mulhi_epu16 (lo, mask_0101);
255 *ret_hi = _mm_mulhi_epu16 (hi, mask_0101);
256}
257
258static force_inline__inline__ __attribute__ ((__always_inline__)) void
259pix_add_multiply_2x128 (__m128i* src_lo,
260 __m128i* src_hi,
261 __m128i* alpha_dst_lo,
262 __m128i* alpha_dst_hi,
263 __m128i* dst_lo,
264 __m128i* dst_hi,
265 __m128i* alpha_src_lo,
266 __m128i* alpha_src_hi,
267 __m128i* ret_lo,
268 __m128i* ret_hi)
269{
270 __m128i t1_lo, t1_hi;
271 __m128i t2_lo, t2_hi;
272
273 pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi);
274 pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi);
275
276 *ret_lo = _mm_adds_epu8 (t1_lo, t2_lo);
277 *ret_hi = _mm_adds_epu8 (t1_hi, t2_hi);
278}
279
280static force_inline__inline__ __attribute__ ((__always_inline__)) void
281negate_2x128 (__m128i data_lo,
282 __m128i data_hi,
283 __m128i* neg_lo,
284 __m128i* neg_hi)
285{
286 *neg_lo = _mm_xor_si128 (data_lo, mask_00ff);
287 *neg_hi = _mm_xor_si128 (data_hi, mask_00ff);
288}
289
290static force_inline__inline__ __attribute__ ((__always_inline__)) void
291invert_colors_2x128 (__m128i data_lo,
292 __m128i data_hi,
293 __m128i* inv_lo,
294 __m128i* inv_hi)
295{
296 __m128i lo, hi;
297
298 lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_lo), (
int)((((3) << 6) | ((0) << 4) | ((1) << 2) |
(2)))))
;
299 hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_hi), (
int)((((3) << 6) | ((0) << 4) | ((1) << 2) |
(2)))))
;
300 *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(lo), (int)
((((3) << 6) | ((0) << 4) | ((1) << 2) | (2
)))))
;
301 *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(hi), (int)
((((3) << 6) | ((0) << 4) | ((1) << 2) | (2
)))))
;
302}
303
304static force_inline__inline__ __attribute__ ((__always_inline__)) void
305over_2x128 (__m128i* src_lo,
306 __m128i* src_hi,
307 __m128i* alpha_lo,
308 __m128i* alpha_hi,
309 __m128i* dst_lo,
310 __m128i* dst_hi)
311{
312 __m128i t1, t2;
313
314 negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);
315
316 pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);
317
318 *dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo);
319 *dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi);
320}
321
322static force_inline__inline__ __attribute__ ((__always_inline__)) void
323over_rev_non_pre_2x128 (__m128i src_lo,
324 __m128i src_hi,
325 __m128i* dst_lo,
326 __m128i* dst_hi)
327{
328 __m128i lo, hi;
329 __m128i alpha_lo, alpha_hi;
330
331 expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi);
332
333 lo = _mm_or_si128 (alpha_lo, mask_alpha);
334 hi = _mm_or_si128 (alpha_hi, mask_alpha);
335
336 invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi);
337
338 pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi);
339
340 over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi);
341}
342
343static force_inline__inline__ __attribute__ ((__always_inline__)) void
344in_over_2x128 (__m128i* src_lo,
345 __m128i* src_hi,
346 __m128i* alpha_lo,
347 __m128i* alpha_hi,
348 __m128i* mask_lo,
349 __m128i* mask_hi,
350 __m128i* dst_lo,
351 __m128i* dst_hi)
352{
353 __m128i s_lo, s_hi;
354 __m128i a_lo, a_hi;
355
356 pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
357 pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
358
359 over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
360}
361
362/* load 4 pixels from a 16-byte boundary aligned address */
363static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
364load_128_aligned (__m128i* src)
365{
366 return _mm_load_si128 (src);
367}
368
369/* load 4 pixels from a unaligned address */
370static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
371load_128_unaligned (const __m128i* src)
372{
373 return _mm_loadu_si128 (src);
12
Passing null pointer value via 1st parameter '__p'
13
Calling '_mm_loadu_si128'
374}
375
376/* save 4 pixels on a 16-byte boundary aligned address */
377static force_inline__inline__ __attribute__ ((__always_inline__)) void
378save_128_aligned (__m128i* dst,
379 __m128i data)
380{
381 _mm_store_si128 (dst, data);
382}
383
384static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
385load_32_1x128 (uint32_t data)
386{
387 return _mm_cvtsi32_si128 (data);
388}
389
390static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
391expand_alpha_rev_1x128 (__m128i data)
392{
393 return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int
)((((0) << 6) | ((0) << 4) | ((0) << 2) | (
0)))))
;
394}
395
396static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
397expand_pixel_8_1x128 (uint8_t data)
398{
399 return _mm_shufflelo_epi16 (((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(unpack_32_1x128
((uint32_t)data)), (int)((((0) << 6) | ((0) << 4
) | ((0) << 2) | (0)))))
400 unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(unpack_32_1x128
((uint32_t)data)), (int)((((0) << 6) | ((0) << 4
) | ((0) << 2) | (0)))))
;
401}
402
403static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
404pix_multiply_1x128 (__m128i data,
405 __m128i alpha)
406{
407 return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha),
408 mask_0080),
409 mask_0101);
410}
411
412static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
413pix_add_multiply_1x128 (__m128i* src,
414 __m128i* alpha_dst,
415 __m128i* dst,
416 __m128i* alpha_src)
417{
418 __m128i t1 = pix_multiply_1x128 (*src, *alpha_dst);
419 __m128i t2 = pix_multiply_1x128 (*dst, *alpha_src);
420
421 return _mm_adds_epu8 (t1, t2);
422}
423
424static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
425negate_1x128 (__m128i data)
426{
427 return _mm_xor_si128 (data, mask_00ff);
428}
429
430static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
431invert_colors_1x128 (__m128i data)
432{
433 return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int
)((((3) << 6) | ((0) << 4) | ((1) << 2) | (
2)))))
;
434}
435
436static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
437over_1x128 (__m128i src, __m128i alpha, __m128i dst)
438{
439 return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha)));
440}
441
442static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
443in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst)
444{
445 return over_1x128 (pix_multiply_1x128 (*src, *mask),
446 pix_multiply_1x128 (*alpha, *mask),
447 *dst);
448}
449
450static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
451over_rev_non_pre_1x128 (__m128i src, __m128i dst)
452{
453 __m128i alpha = expand_alpha_1x128 (src);
454
455 return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src),
456 _mm_or_si128 (alpha, mask_alpha)),
457 alpha,
458 dst);
459}
460
461static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
462pack_1x128_32 (__m128i data)
463{
464 return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ()));
465}
466
467static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
468expand565_16_1x128 (uint16_t pixel)
469{
470 __m128i m = _mm_cvtsi32_si128 (pixel);
471
472 m = unpack_565_to_8888 (m);
473
474 return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ());
475}
476
477static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
478core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
479{
480 uint8_t a;
481 __m128i xmms;
482
483 a = src >> 24;
484
485 if (a == 0xff)
486 {
487 return src;
488 }
489 else if (src)
490 {
491 xmms = unpack_32_1x128 (src);
492 return pack_1x128_32 (
493 over_1x128 (xmms, expand_alpha_1x128 (xmms),
494 unpack_32_1x128 (dst)));
495 }
496
497 return dst;
498}
499
500static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
501combine1 (const uint32_t *ps, const uint32_t *pm)
502{
503 uint32_t s;
504 memcpy(&s, ps, sizeof(uint32_t));
505
506 if (pm)
507 {
508 __m128i ms, mm;
509
510 mm = unpack_32_1x128 (*pm);
511 mm = expand_alpha_1x128 (mm);
512
513 ms = unpack_32_1x128 (s);
514 ms = pix_multiply_1x128 (ms, mm);
515
516 s = pack_1x128_32 (ms);
517 }
518
519 return s;
520}
521
522static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
523combine4 (const __m128i *ps, const __m128i *pm)
524{
525 __m128i xmm_src_lo, xmm_src_hi;
526 __m128i xmm_msk_lo, xmm_msk_hi;
527 __m128i s;
528
529 if (pm)
530 {
531 xmm_msk_lo = load_128_unaligned (pm);
532
533 if (is_transparent (xmm_msk_lo))
534 return _mm_setzero_si128 ();
535 }
536
537 s = load_128_unaligned (ps);
538
539 if (pm)
540 {
541 unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi);
542 unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi);
543
544 expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi);
545
546 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
547 &xmm_msk_lo, &xmm_msk_hi,
548 &xmm_src_lo, &xmm_src_hi);
549
550 s = pack_2x128_128 (xmm_src_lo, xmm_src_hi);
551 }
552
553 return s;
554}
555
556static force_inline__inline__ __attribute__ ((__always_inline__)) void
557core_combine_over_u_sse2_mask (uint32_t * pd,
558 const uint32_t* ps,
559 const uint32_t* pm,
560 int w)
561{
562 uint32_t s, d;
563
564 /* Align dst on a 16-byte boundary */
565 while (w && ((uintptr_t)pd & 15))
1
Assuming 'w' is not equal to 0
2
Loop condition is true. Entering loop body
6
Assuming 'w' is not equal to 0
7
Loop condition is false. Execution continues on line 578
566 {
567 d = *pd;
568 s = combine1 (ps, pm);
569
570 if (s)
3
Assuming 's' is 0
4
Taking false branch
571 *pd = core_combine_over_u_pixel_sse2 (s, d);
572 pd++;
573 ps++;
574 pm++;
5
Null pointer value stored to 'pm'
575 w--;
576 }
577
578 while (w >= 4)
8
Assuming 'w' is >= 4
9
Loop condition is true. Entering loop body
579 {
580 __m128i mask = load_128_unaligned ((__m128i *)pm);
10
Passing null pointer value via 1st parameter 'src'
11
Calling 'load_128_unaligned'
581
582 if (!is_zero (mask))
583 {
584 __m128i src;
585 __m128i src_hi, src_lo;
586 __m128i mask_hi, mask_lo;
587 __m128i alpha_hi, alpha_lo;
588
589 src = load_128_unaligned ((__m128i *)ps);
590
591 if (is_opaque (_mm_and_si128 (src, mask)))
592 {
593 save_128_aligned ((__m128i *)pd, src);
594 }
595 else
596 {
597 __m128i dst = load_128_aligned ((__m128i *)pd);
598 __m128i dst_hi, dst_lo;
599
600 unpack_128_2x128 (mask, &mask_lo, &mask_hi);
601 unpack_128_2x128 (src, &src_lo, &src_hi);
602
603 expand_alpha_2x128 (mask_lo, mask_hi, &mask_lo, &mask_hi);
604 pix_multiply_2x128 (&src_lo, &src_hi,
605 &mask_lo, &mask_hi,
606 &src_lo, &src_hi);
607
608 unpack_128_2x128 (dst, &dst_lo, &dst_hi);
609
610 expand_alpha_2x128 (src_lo, src_hi,
611 &alpha_lo, &alpha_hi);
612
613 over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
614 &dst_lo, &dst_hi);
615
616 save_128_aligned (
617 (__m128i *)pd,
618 pack_2x128_128 (dst_lo, dst_hi));
619 }
620 }
621
622 pm += 4;
623 ps += 4;
624 pd += 4;
625 w -= 4;
626 }
627 while (w)
628 {
629 d = *pd;
630 s = combine1 (ps, pm);
631
632 if (s)
633 *pd = core_combine_over_u_pixel_sse2 (s, d);
634 pd++;
635 ps++;
636 pm++;
637
638 w--;
639 }
640}
641
642static force_inline__inline__ __attribute__ ((__always_inline__)) void
643core_combine_over_u_sse2_no_mask (uint32_t * pd,
644 const uint32_t* ps,
645 int w)
646{
647 uint32_t s, d;
648
649 /* Align dst on a 16-byte boundary */
650 while (w && ((uintptr_t)pd & 15))
651 {
652 d = *pd;
653 s = *ps;
654
655 if (s)
656 *pd = core_combine_over_u_pixel_sse2 (s, d);
657 pd++;
658 ps++;
659 w--;
660 }
661
662 while (w >= 4)
663 {
664 __m128i src;
665 __m128i src_hi, src_lo, dst_hi, dst_lo;
666 __m128i alpha_hi, alpha_lo;
667
668 src = load_128_unaligned ((__m128i *)ps);
669
670 if (!is_zero (src))
671 {
672 if (is_opaque (src))
673 {
674 save_128_aligned ((__m128i *)pd, src);
675 }
676 else
677 {
678 __m128i dst = load_128_aligned ((__m128i *)pd);
679
680 unpack_128_2x128 (src, &src_lo, &src_hi);
681 unpack_128_2x128 (dst, &dst_lo, &dst_hi);
682
683 expand_alpha_2x128 (src_lo, src_hi,
684 &alpha_lo, &alpha_hi);
685 over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
686 &dst_lo, &dst_hi);
687
688 save_128_aligned (
689 (__m128i *)pd,
690 pack_2x128_128 (dst_lo, dst_hi));
691 }
692 }
693
694 ps += 4;
695 pd += 4;
696 w -= 4;
697 }
698 while (w)
699 {
700 d = *pd;
701 s = *ps;
702
703 if (s)
704 *pd = core_combine_over_u_pixel_sse2 (s, d);
705 pd++;
706 ps++;
707
708 w--;
709 }
710}
711
712static force_inline__inline__ __attribute__ ((__always_inline__)) void
713sse2_combine_over_u (pixman_implementation_t *imp,
714 pixman_op_t op,
715 uint32_t * pd,
716 const uint32_t * ps,
717 const uint32_t * pm,
718 int w)
719{
720 if (pm)
721 core_combine_over_u_sse2_mask (pd, ps, pm, w);
722 else
723 core_combine_over_u_sse2_no_mask (pd, ps, w);
724}
725
726static void
727sse2_combine_over_reverse_u (pixman_implementation_t *imp,
728 pixman_op_t op,
729 uint32_t * pd,
730 const uint32_t * ps,
731 const uint32_t * pm,
732 int w)
733{
734 uint32_t s, d;
735
736 __m128i xmm_dst_lo, xmm_dst_hi;
737 __m128i xmm_src_lo, xmm_src_hi;
738 __m128i xmm_alpha_lo, xmm_alpha_hi;
739
740 /* Align dst on a 16-byte boundary */
741 while (w &&
742 ((uintptr_t)pd & 15))
743 {
744 d = *pd;
745 s = combine1 (ps, pm);
746
747 *pd++ = core_combine_over_u_pixel_sse2 (d, s);
748 w--;
749 ps++;
750 if (pm)
751 pm++;
752 }
753
754 while (w >= 4)
755 {
756 /* I'm loading unaligned because I'm not sure
757 * about the address alignment.
758 */
759 xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
760 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
761
762 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
763 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
764
765 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
766 &xmm_alpha_lo, &xmm_alpha_hi);
767
768 over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
769 &xmm_alpha_lo, &xmm_alpha_hi,
770 &xmm_src_lo, &xmm_src_hi);
771
772 /* rebuid the 4 pixel data and save*/
773 save_128_aligned ((__m128i*)pd,
774 pack_2x128_128 (xmm_src_lo, xmm_src_hi));
775
776 w -= 4;
777 ps += 4;
778 pd += 4;
779
780 if (pm)
781 pm += 4;
782 }
783
784 while (w)
785 {
786 d = *pd;
787 s = combine1 (ps, pm);
788
789 *pd++ = core_combine_over_u_pixel_sse2 (d, s);
790 ps++;
791 w--;
792 if (pm)
793 pm++;
794 }
795}
796
797static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
798core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst)
799{
800 uint32_t maska = src >> 24;
801
802 if (maska == 0)
803 {
804 return 0;
805 }
806 else if (maska != 0xff)
807 {
808 return pack_1x128_32 (
809 pix_multiply_1x128 (unpack_32_1x128 (dst),
810 expand_alpha_1x128 (unpack_32_1x128 (src))));
811 }
812
813 return dst;
814}
815
816static void
817sse2_combine_in_u (pixman_implementation_t *imp,
818 pixman_op_t op,
819 uint32_t * pd,
820 const uint32_t * ps,
821 const uint32_t * pm,
822 int w)
823{
824 uint32_t s, d;
825
826 __m128i xmm_src_lo, xmm_src_hi;
827 __m128i xmm_dst_lo, xmm_dst_hi;
828
829 while (w && ((uintptr_t)pd & 15))
830 {
831 s = combine1 (ps, pm);
832 d = *pd;
833
834 *pd++ = core_combine_in_u_pixel_sse2 (d, s);
835 w--;
836 ps++;
837 if (pm)
838 pm++;
839 }
840
841 while (w >= 4)
842 {
843 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
844 xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm);
845
846 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
847 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
848
849 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
850 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
851 &xmm_dst_lo, &xmm_dst_hi,
852 &xmm_dst_lo, &xmm_dst_hi);
853
854 save_128_aligned ((__m128i*)pd,
855 pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
856
857 ps += 4;
858 pd += 4;
859 w -= 4;
860 if (pm)
861 pm += 4;
862 }
863
864 while (w)
865 {
866 s = combine1 (ps, pm);
867 d = *pd;
868
869 *pd++ = core_combine_in_u_pixel_sse2 (d, s);
870 w--;
871 ps++;
872 if (pm)
873 pm++;
874 }
875}
876
877static void
878sse2_combine_in_reverse_u (pixman_implementation_t *imp,
879 pixman_op_t op,
880 uint32_t * pd,
881 const uint32_t * ps,
882 const uint32_t * pm,
883 int w)
884{
885 uint32_t s, d;
886
887 __m128i xmm_src_lo, xmm_src_hi;
888 __m128i xmm_dst_lo, xmm_dst_hi;
889
890 while (w && ((uintptr_t)pd & 15))
891 {
892 s = combine1 (ps, pm);
893 d = *pd;
894
895 *pd++ = core_combine_in_u_pixel_sse2 (s, d);
896 ps++;
897 w--;
898 if (pm)
899 pm++;
900 }
901
902 while (w >= 4)
903 {
904 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
905 xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
906
907 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
908 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
909
910 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
911 pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
912 &xmm_src_lo, &xmm_src_hi,
913 &xmm_dst_lo, &xmm_dst_hi);
914
915 save_128_aligned (
916 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
917
918 ps += 4;
919 pd += 4;
920 w -= 4;
921 if (pm)
922 pm += 4;
923 }
924
925 while (w)
926 {
927 s = combine1 (ps, pm);
928 d = *pd;
929
930 *pd++ = core_combine_in_u_pixel_sse2 (s, d);
931 w--;
932 ps++;
933 if (pm)
934 pm++;
935 }
936}
937
938static void
939sse2_combine_out_reverse_u (pixman_implementation_t *imp,
940 pixman_op_t op,
941 uint32_t * pd,
942 const uint32_t * ps,
943 const uint32_t * pm,
944 int w)
945{
946 while (w && ((uintptr_t)pd & 15))
947 {
948 uint32_t s = combine1 (ps, pm);
949 uint32_t d = *pd;
950
951 *pd++ = pack_1x128_32 (
952 pix_multiply_1x128 (
953 unpack_32_1x128 (d), negate_1x128 (
954 expand_alpha_1x128 (unpack_32_1x128 (s)))));
955
956 if (pm)
957 pm++;
958 ps++;
959 w--;
960 }
961
962 while (w >= 4)
963 {
964 __m128i xmm_src_lo, xmm_src_hi;
965 __m128i xmm_dst_lo, xmm_dst_hi;
966
967 xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
968 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
969
970 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
971 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
972
973 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
974 negate_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
975
976 pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
977 &xmm_src_lo, &xmm_src_hi,
978 &xmm_dst_lo, &xmm_dst_hi);
979
980 save_128_aligned (
981 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
982
983 ps += 4;
984 pd += 4;
985 if (pm)
986 pm += 4;
987
988 w -= 4;
989 }
990
991 while (w)
992 {
993 uint32_t s = combine1 (ps, pm);
994 uint32_t d = *pd;
995
996 *pd++ = pack_1x128_32 (
997 pix_multiply_1x128 (
998 unpack_32_1x128 (d), negate_1x128 (
999 expand_alpha_1x128 (unpack_32_1x128 (s)))));
1000 ps++;
1001 if (pm)
1002 pm++;
1003 w--;
1004 }
1005}
1006
1007static void
1008sse2_combine_out_u (pixman_implementation_t *imp,
1009 pixman_op_t op,
1010 uint32_t * pd,
1011 const uint32_t * ps,
1012 const uint32_t * pm,
1013 int w)
1014{
1015 while (w && ((uintptr_t)pd & 15))
1016 {
1017 uint32_t s = combine1 (ps, pm);
1018 uint32_t d = *pd;
1019
1020 *pd++ = pack_1x128_32 (
1021 pix_multiply_1x128 (
1022 unpack_32_1x128 (s), negate_1x128 (
1023 expand_alpha_1x128 (unpack_32_1x128 (d)))));
1024 w--;
1025 ps++;
1026 if (pm)
1027 pm++;
1028 }
1029
1030 while (w >= 4)
1031 {
1032 __m128i xmm_src_lo, xmm_src_hi;
1033 __m128i xmm_dst_lo, xmm_dst_hi;
1034
1035 xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
1036 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
1037
1038 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1039 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1040
1041 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1042 negate_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1043
1044 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1045 &xmm_dst_lo, &xmm_dst_hi,
1046 &xmm_dst_lo, &xmm_dst_hi);
1047
1048 save_128_aligned (
1049 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1050
1051 ps += 4;
1052 pd += 4;
1053 w -= 4;
1054 if (pm)
1055 pm += 4;
1056 }
1057
1058 while (w)
1059 {
1060 uint32_t s = combine1 (ps, pm);
1061 uint32_t d = *pd;
1062
1063 *pd++ = pack_1x128_32 (
1064 pix_multiply_1x128 (
1065 unpack_32_1x128 (s), negate_1x128 (
1066 expand_alpha_1x128 (unpack_32_1x128 (d)))));
1067 w--;
1068 ps++;
1069 if (pm)
1070 pm++;
1071 }
1072}
1073
1074static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1075core_combine_atop_u_pixel_sse2 (uint32_t src,
1076 uint32_t dst)
1077{
1078 __m128i s = unpack_32_1x128 (src);
1079 __m128i d = unpack_32_1x128 (dst);
1080
1081 __m128i sa = negate_1x128 (expand_alpha_1x128 (s));
1082 __m128i da = expand_alpha_1x128 (d);
1083
1084 return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
1085}
1086
1087static void
1088sse2_combine_atop_u (pixman_implementation_t *imp,
1089 pixman_op_t op,
1090 uint32_t * pd,
1091 const uint32_t * ps,
1092 const uint32_t * pm,
1093 int w)
1094{
1095 uint32_t s, d;
1096
1097 __m128i xmm_src_lo, xmm_src_hi;
1098 __m128i xmm_dst_lo, xmm_dst_hi;
1099 __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
1100 __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
1101
1102 while (w && ((uintptr_t)pd & 15))
1103 {
1104 s = combine1 (ps, pm);
1105 d = *pd;
1106
1107 *pd++ = core_combine_atop_u_pixel_sse2 (s, d);
1108 w--;
1109 ps++;
1110 if (pm)
1111 pm++;
1112 }
1113
1114 while (w >= 4)
1115 {
1116 xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
1117 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
1118
1119 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1120 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1121
1122 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1123 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1124 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1125 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1126
1127 negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
1128 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1129
1130 pix_add_multiply_2x128 (
1131 &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
1132 &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
1133 &xmm_dst_lo, &xmm_dst_hi);
1134
1135 save_128_aligned (
1136 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1137
1138 ps += 4;
1139 pd += 4;
1140 w -= 4;
1141 if (pm)
1142 pm += 4;
1143 }
1144
1145 while (w)
1146 {
1147 s = combine1 (ps, pm);
1148 d = *pd;
1149
1150 *pd++ = core_combine_atop_u_pixel_sse2 (s, d);
1151 w--;
1152 ps++;
1153 if (pm)
1154 pm++;
1155 }
1156}
1157
1158static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1159core_combine_reverse_atop_u_pixel_sse2 (uint32_t src,
1160 uint32_t dst)
1161{
1162 __m128i s = unpack_32_1x128 (src);
1163 __m128i d = unpack_32_1x128 (dst);
1164
1165 __m128i sa = expand_alpha_1x128 (s);
1166 __m128i da = negate_1x128 (expand_alpha_1x128 (d));
1167
1168 return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
1169}
1170
1171static void
1172sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
1173 pixman_op_t op,
1174 uint32_t * pd,
1175 const uint32_t * ps,
1176 const uint32_t * pm,
1177 int w)
1178{
1179 uint32_t s, d;
1180
1181 __m128i xmm_src_lo, xmm_src_hi;
1182 __m128i xmm_dst_lo, xmm_dst_hi;
1183 __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
1184 __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
1185
1186 while (w && ((uintptr_t)pd & 15))
1187 {
1188 s = combine1 (ps, pm);
1189 d = *pd;
1190
1191 *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
1192 ps++;
1193 w--;
1194 if (pm)
1195 pm++;
1196 }
1197
1198 while (w >= 4)
1199 {
1200 xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
1201 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
1202
1203 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1204 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1205
1206 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1207 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1208 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1209 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1210
1211 negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
1212 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1213
1214 pix_add_multiply_2x128 (
1215 &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
1216 &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
1217 &xmm_dst_lo, &xmm_dst_hi);
1218
1219 save_128_aligned (
1220 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1221
1222 ps += 4;
1223 pd += 4;
1224 w -= 4;
1225 if (pm)
1226 pm += 4;
1227 }
1228
1229 while (w)
1230 {
1231 s = combine1 (ps, pm);
1232 d = *pd;
1233
1234 *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
1235 ps++;
1236 w--;
1237 if (pm)
1238 pm++;
1239 }
1240}
1241
1242static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1243core_combine_xor_u_pixel_sse2 (uint32_t src,
1244 uint32_t dst)
1245{
1246 __m128i s = unpack_32_1x128 (src);
1247 __m128i d = unpack_32_1x128 (dst);
1248
1249 __m128i neg_d = negate_1x128 (expand_alpha_1x128 (d));
1250 __m128i neg_s = negate_1x128 (expand_alpha_1x128 (s));
1251
1252 return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s));
1253}
1254
1255static void
1256sse2_combine_xor_u (pixman_implementation_t *imp,
1257 pixman_op_t op,
1258 uint32_t * dst,
1259 const uint32_t * src,
1260 const uint32_t * mask,
1261 int width)
1262{
1263 int w = width;
1264 uint32_t s, d;
1265 uint32_t* pd = dst;
1266 const uint32_t* ps = src;
1267 const uint32_t* pm = mask;
1268
1269 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
1270 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
1271 __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
1272 __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
1273
1274 while (w && ((uintptr_t)pd & 15))
1275 {
1276 s = combine1 (ps, pm);
1277 d = *pd;
1278
1279 *pd++ = core_combine_xor_u_pixel_sse2 (s, d);
1280 w--;
1281 ps++;
1282 if (pm)
1283 pm++;
1284 }
1285
1286 while (w >= 4)
1287 {
1288 xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm);
1289 xmm_dst = load_128_aligned ((__m128i*) pd);
1290
1291 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
1292 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
1293
1294 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1295 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1296 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1297 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1298
1299 negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
1300 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1301 negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
1302 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1303
1304 pix_add_multiply_2x128 (
1305 &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
1306 &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
1307 &xmm_dst_lo, &xmm_dst_hi);
1308
1309 save_128_aligned (
1310 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1311
1312 ps += 4;
1313 pd += 4;
1314 w -= 4;
1315 if (pm)
1316 pm += 4;
1317 }
1318
1319 while (w)
1320 {
1321 s = combine1 (ps, pm);
1322 d = *pd;
1323
1324 *pd++ = core_combine_xor_u_pixel_sse2 (s, d);
1325 w--;
1326 ps++;
1327 if (pm)
1328 pm++;
1329 }
1330}
1331
1332static force_inline__inline__ __attribute__ ((__always_inline__)) void
1333sse2_combine_add_u (pixman_implementation_t *imp,
1334 pixman_op_t op,
1335 uint32_t * dst,
1336 const uint32_t * src,
1337 const uint32_t * mask,
1338 int width)
1339{
1340 int w = width;
1341 uint32_t s, d;
1342 uint32_t* pd = dst;
1343 const uint32_t* ps = src;
1344 const uint32_t* pm = mask;
1345
1346 while (w && (uintptr_t)pd & 15)
1347 {
1348 s = combine1 (ps, pm);
1349 d = *pd;
1350
1351 ps++;
1352 if (pm)
1353 pm++;
1354 *pd++ = _mm_cvtsi128_si32 (
1355 _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
1356 w--;
1357 }
1358
1359 while (w >= 4)
1360 {
1361 __m128i s;
1362
1363 s = combine4 ((__m128i*)ps, (__m128i*)pm);
1364
1365 save_128_aligned (
1366 (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned ((__m128i*)pd)));
1367
1368 pd += 4;
1369 ps += 4;
1370 if (pm)
1371 pm += 4;
1372 w -= 4;
1373 }
1374
1375 while (w--)
1376 {
1377 s = combine1 (ps, pm);
1378 d = *pd;
1379
1380 ps++;
1381 *pd++ = _mm_cvtsi128_si32 (
1382 _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
1383 if (pm)
1384 pm++;
1385 }
1386}
1387
1388static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1389core_combine_saturate_u_pixel_sse2 (uint32_t src,
1390 uint32_t dst)
1391{
1392 __m128i ms = unpack_32_1x128 (src);
1393 __m128i md = unpack_32_1x128 (dst);
1394 uint32_t sa = src >> 24;
1395 uint32_t da = ~dst >> 24;
1396
1397 if (sa > da)
1398 {
1399 ms = pix_multiply_1x128 (
1400 ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa)(((uint16_t) (da) * 0xff + ((sa) / 2)) / (sa)) << 24)));
1401 }
1402
1403 return pack_1x128_32 (_mm_adds_epu16 (md, ms));
1404}
1405
1406static void
1407sse2_combine_saturate_u (pixman_implementation_t *imp,
1408 pixman_op_t op,
1409 uint32_t * pd,
1410 const uint32_t * ps,
1411 const uint32_t * pm,
1412 int w)
1413{
1414 uint32_t s, d;
1415
1416 uint32_t pack_cmp;
1417 __m128i xmm_src, xmm_dst;
1418
1419 while (w && (uintptr_t)pd & 15)
1420 {
1421 s = combine1 (ps, pm);
1422 d = *pd;
1423
1424 *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1425 w--;
1426 ps++;
1427 if (pm)
1428 pm++;
1429 }
1430
1431 while (w >= 4)
1432 {
1433 xmm_dst = load_128_aligned ((__m128i*)pd);
1434 xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm);
1435
1436 pack_cmp = _mm_movemask_epi8 (
1437 _mm_cmpgt_epi32 (
1438 _mm_srli_epi32 (xmm_src, 24),
1439 _mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24)));
1440
1441 /* if some alpha src is grater than respective ~alpha dst */
1442 if (pack_cmp)
1443 {
1444 s = combine1 (ps++, pm);
1445 d = *pd;
1446 *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1447 if (pm)
1448 pm++;
1449
1450 s = combine1 (ps++, pm);
1451 d = *pd;
1452 *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1453 if (pm)
1454 pm++;
1455
1456 s = combine1 (ps++, pm);
1457 d = *pd;
1458 *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1459 if (pm)
1460 pm++;
1461
1462 s = combine1 (ps++, pm);
1463 d = *pd;
1464 *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1465 if (pm)
1466 pm++;
1467 }
1468 else
1469 {
1470 save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src));
1471
1472 pd += 4;
1473 ps += 4;
1474 if (pm)
1475 pm += 4;
1476 }
1477
1478 w -= 4;
1479 }
1480
1481 while (w--)
1482 {
1483 s = combine1 (ps, pm);
1484 d = *pd;
1485
1486 *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1487 ps++;
1488 if (pm)
1489 pm++;
1490 }
1491}
1492
1493static void
1494sse2_combine_src_ca (pixman_implementation_t *imp,
1495 pixman_op_t op,
1496 uint32_t * pd,
1497 const uint32_t * ps,
1498 const uint32_t * pm,
1499 int w)
1500{
1501 uint32_t s, m;
1502
1503 __m128i xmm_src_lo, xmm_src_hi;
1504 __m128i xmm_mask_lo, xmm_mask_hi;
1505 __m128i xmm_dst_lo, xmm_dst_hi;
1506
1507 while (w && (uintptr_t)pd & 15)
1508 {
1509 s = *ps++;
1510 m = *pm++;
1511 *pd++ = pack_1x128_32 (
1512 pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
1513 w--;
1514 }
1515
1516 while (w >= 4)
1517 {
1518 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1519 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1520
1521 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1522 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1523
1524 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1525 &xmm_mask_lo, &xmm_mask_hi,
1526 &xmm_dst_lo, &xmm_dst_hi);
1527
1528 save_128_aligned (
1529 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1530
1531 ps += 4;
1532 pd += 4;
1533 pm += 4;
1534 w -= 4;
1535 }
1536
1537 while (w)
1538 {
1539 s = *ps++;
1540 m = *pm++;
1541 *pd++ = pack_1x128_32 (
1542 pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
1543 w--;
1544 }
1545}
1546
1547static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1548core_combine_over_ca_pixel_sse2 (uint32_t src,
1549 uint32_t mask,
1550 uint32_t dst)
1551{
1552 __m128i s = unpack_32_1x128 (src);
1553 __m128i expAlpha = expand_alpha_1x128 (s);
1554 __m128i unpk_mask = unpack_32_1x128 (mask);
1555 __m128i unpk_dst = unpack_32_1x128 (dst);
1556
1557 return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst));
1558}
1559
1560static void
1561sse2_combine_over_ca (pixman_implementation_t *imp,
1562 pixman_op_t op,
1563 uint32_t * pd,
1564 const uint32_t * ps,
1565 const uint32_t * pm,
1566 int w)
1567{
1568 uint32_t s, m, d;
1569
1570 __m128i xmm_alpha_lo, xmm_alpha_hi;
1571 __m128i xmm_src_lo, xmm_src_hi;
1572 __m128i xmm_dst_lo, xmm_dst_hi;
1573 __m128i xmm_mask_lo, xmm_mask_hi;
1574
1575 while (w && (uintptr_t)pd & 15)
1576 {
1577 s = *ps++;
1578 m = *pm++;
1579 d = *pd;
1580
1581 *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
1582 w--;
1583 }
1584
1585 while (w >= 4)
1586 {
1587 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1588 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1589 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1590
1591 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1592 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1593 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1594
1595 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1596 &xmm_alpha_lo, &xmm_alpha_hi);
1597
1598 in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
1599 &xmm_alpha_lo, &xmm_alpha_hi,
1600 &xmm_mask_lo, &xmm_mask_hi,
1601 &xmm_dst_lo, &xmm_dst_hi);
1602
1603 save_128_aligned (
1604 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1605
1606 ps += 4;
1607 pd += 4;
1608 pm += 4;
1609 w -= 4;
1610 }
1611
1612 while (w)
1613 {
1614 s = *ps++;
1615 m = *pm++;
1616 d = *pd;
1617
1618 *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
1619 w--;
1620 }
1621}
1622
1623static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1624core_combine_over_reverse_ca_pixel_sse2 (uint32_t src,
1625 uint32_t mask,
1626 uint32_t dst)
1627{
1628 __m128i d = unpack_32_1x128 (dst);
1629
1630 return pack_1x128_32 (
1631 over_1x128 (d, expand_alpha_1x128 (d),
1632 pix_multiply_1x128 (unpack_32_1x128 (src),
1633 unpack_32_1x128 (mask))));
1634}
1635
1636static void
1637sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
1638 pixman_op_t op,
1639 uint32_t * pd,
1640 const uint32_t * ps,
1641 const uint32_t * pm,
1642 int w)
1643{
1644 uint32_t s, m, d;
1645
1646 __m128i xmm_alpha_lo, xmm_alpha_hi;
1647 __m128i xmm_src_lo, xmm_src_hi;
1648 __m128i xmm_dst_lo, xmm_dst_hi;
1649 __m128i xmm_mask_lo, xmm_mask_hi;
1650
1651 while (w && (uintptr_t)pd & 15)
1652 {
1653 s = *ps++;
1654 m = *pm++;
1655 d = *pd;
1656
1657 *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
1658 w--;
1659 }
1660
1661 while (w >= 4)
1662 {
1663 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1664 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1665 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1666
1667 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1668 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1669 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1670
1671 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1672 &xmm_alpha_lo, &xmm_alpha_hi);
1673 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1674 &xmm_mask_lo, &xmm_mask_hi,
1675 &xmm_mask_lo, &xmm_mask_hi);
1676
1677 over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1678 &xmm_alpha_lo, &xmm_alpha_hi,
1679 &xmm_mask_lo, &xmm_mask_hi);
1680
1681 save_128_aligned (
1682 (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
1683
1684 ps += 4;
1685 pd += 4;
1686 pm += 4;
1687 w -= 4;
1688 }
1689
1690 while (w)
1691 {
1692 s = *ps++;
1693 m = *pm++;
1694 d = *pd;
1695
1696 *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
1697 w--;
1698 }
1699}
1700
1701static void
1702sse2_combine_in_ca (pixman_implementation_t *imp,
1703 pixman_op_t op,
1704 uint32_t * pd,
1705 const uint32_t * ps,
1706 const uint32_t * pm,
1707 int w)
1708{
1709 uint32_t s, m, d;
1710
1711 __m128i xmm_alpha_lo, xmm_alpha_hi;
1712 __m128i xmm_src_lo, xmm_src_hi;
1713 __m128i xmm_dst_lo, xmm_dst_hi;
1714 __m128i xmm_mask_lo, xmm_mask_hi;
1715
1716 while (w && (uintptr_t)pd & 15)
1717 {
1718 s = *ps++;
1719 m = *pm++;
1720 d = *pd;
1721
1722 *pd++ = pack_1x128_32 (
1723 pix_multiply_1x128 (
1724 pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)),
1725 expand_alpha_1x128 (unpack_32_1x128 (d))));
1726
1727 w--;
1728 }
1729
1730 while (w >= 4)
1731 {
1732 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1733 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1734 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1735
1736 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1737 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1738 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1739
1740 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1741 &xmm_alpha_lo, &xmm_alpha_hi);
1742
1743 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1744 &xmm_mask_lo, &xmm_mask_hi,
1745 &xmm_dst_lo, &xmm_dst_hi);
1746
1747 pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1748 &xmm_alpha_lo, &xmm_alpha_hi,
1749 &xmm_dst_lo, &xmm_dst_hi);
1750
1751 save_128_aligned (
1752 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1753
1754 ps += 4;
1755 pd += 4;
1756 pm += 4;
1757 w -= 4;
1758 }
1759
1760 while (w)
1761 {
1762 s = *ps++;
1763 m = *pm++;
1764 d = *pd;
1765
1766 *pd++ = pack_1x128_32 (
1767 pix_multiply_1x128 (
1768 pix_multiply_1x128 (
1769 unpack_32_1x128 (s), unpack_32_1x128 (m)),
1770 expand_alpha_1x128 (unpack_32_1x128 (d))));
1771
1772 w--;
1773 }
1774}
1775
1776static void
1777sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
1778 pixman_op_t op,
1779 uint32_t * pd,
1780 const uint32_t * ps,
1781 const uint32_t * pm,
1782 int w)
1783{
1784 uint32_t s, m, d;
1785
1786 __m128i xmm_alpha_lo, xmm_alpha_hi;
1787 __m128i xmm_src_lo, xmm_src_hi;
1788 __m128i xmm_dst_lo, xmm_dst_hi;
1789 __m128i xmm_mask_lo, xmm_mask_hi;
1790
1791 while (w && (uintptr_t)pd & 15)
1792 {
1793 s = *ps++;
1794 m = *pm++;
1795 d = *pd;
1796
1797 *pd++ = pack_1x128_32 (
1798 pix_multiply_1x128 (
1799 unpack_32_1x128 (d),
1800 pix_multiply_1x128 (unpack_32_1x128 (m),
1801 expand_alpha_1x128 (unpack_32_1x128 (s)))));
1802 w--;
1803 }
1804
1805 while (w >= 4)
1806 {
1807 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1808 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1809 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1810
1811 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1812 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1813 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1814
1815 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1816 &xmm_alpha_lo, &xmm_alpha_hi);
1817 pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
1818 &xmm_alpha_lo, &xmm_alpha_hi,
1819 &xmm_alpha_lo, &xmm_alpha_hi);
1820
1821 pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1822 &xmm_alpha_lo, &xmm_alpha_hi,
1823 &xmm_dst_lo, &xmm_dst_hi);
1824
1825 save_128_aligned (
1826 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1827
1828 ps += 4;
1829 pd += 4;
1830 pm += 4;
1831 w -= 4;
1832 }
1833
1834 while (w)
1835 {
1836 s = *ps++;
1837 m = *pm++;
1838 d = *pd;
1839
1840 *pd++ = pack_1x128_32 (
1841 pix_multiply_1x128 (
1842 unpack_32_1x128 (d),
1843 pix_multiply_1x128 (unpack_32_1x128 (m),
1844 expand_alpha_1x128 (unpack_32_1x128 (s)))));
1845 w--;
1846 }
1847}
1848
1849static void
1850sse2_combine_out_ca (pixman_implementation_t *imp,
1851 pixman_op_t op,
1852 uint32_t * pd,
1853 const uint32_t * ps,
1854 const uint32_t * pm,
1855 int w)
1856{
1857 uint32_t s, m, d;
1858
1859 __m128i xmm_alpha_lo, xmm_alpha_hi;
1860 __m128i xmm_src_lo, xmm_src_hi;
1861 __m128i xmm_dst_lo, xmm_dst_hi;
1862 __m128i xmm_mask_lo, xmm_mask_hi;
1863
1864 while (w && (uintptr_t)pd & 15)
1865 {
1866 s = *ps++;
1867 m = *pm++;
1868 d = *pd;
1869
1870 *pd++ = pack_1x128_32 (
1871 pix_multiply_1x128 (
1872 pix_multiply_1x128 (
1873 unpack_32_1x128 (s), unpack_32_1x128 (m)),
1874 negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
1875 w--;
1876 }
1877
1878 while (w >= 4)
1879 {
1880 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1881 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1882 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1883
1884 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1885 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1886 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1887
1888 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1889 &xmm_alpha_lo, &xmm_alpha_hi);
1890 negate_2x128 (xmm_alpha_lo, xmm_alpha_hi,
1891 &xmm_alpha_lo, &xmm_alpha_hi);
1892
1893 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1894 &xmm_mask_lo, &xmm_mask_hi,
1895 &xmm_dst_lo, &xmm_dst_hi);
1896 pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1897 &xmm_alpha_lo, &xmm_alpha_hi,
1898 &xmm_dst_lo, &xmm_dst_hi);
1899
1900 save_128_aligned (
1901 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1902
1903 ps += 4;
1904 pd += 4;
1905 pm += 4;
1906 w -= 4;
1907 }
1908
1909 while (w)
1910 {
1911 s = *ps++;
1912 m = *pm++;
1913 d = *pd;
1914
1915 *pd++ = pack_1x128_32 (
1916 pix_multiply_1x128 (
1917 pix_multiply_1x128 (
1918 unpack_32_1x128 (s), unpack_32_1x128 (m)),
1919 negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
1920
1921 w--;
1922 }
1923}
1924
1925static void
1926sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
1927 pixman_op_t op,
1928 uint32_t * pd,
1929 const uint32_t * ps,
1930 const uint32_t * pm,
1931 int w)
1932{
1933 uint32_t s, m, d;
1934
1935 __m128i xmm_alpha_lo, xmm_alpha_hi;
1936 __m128i xmm_src_lo, xmm_src_hi;
1937 __m128i xmm_dst_lo, xmm_dst_hi;
1938 __m128i xmm_mask_lo, xmm_mask_hi;
1939
1940 while (w && (uintptr_t)pd & 15)
1941 {
1942 s = *ps++;
1943 m = *pm++;
1944 d = *pd;
1945
1946 *pd++ = pack_1x128_32 (
1947 pix_multiply_1x128 (
1948 unpack_32_1x128 (d),
1949 negate_1x128 (pix_multiply_1x128 (
1950 unpack_32_1x128 (m),
1951 expand_alpha_1x128 (unpack_32_1x128 (s))))));
1952 w--;
1953 }
1954
1955 while (w >= 4)
1956 {
1957 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1958 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1959 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1960
1961 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1962 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1963 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1964
1965 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1966 &xmm_alpha_lo, &xmm_alpha_hi);
1967
1968 pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
1969 &xmm_alpha_lo, &xmm_alpha_hi,
1970 &xmm_mask_lo, &xmm_mask_hi);
1971
1972 negate_2x128 (xmm_mask_lo, xmm_mask_hi,
1973 &xmm_mask_lo, &xmm_mask_hi);
1974
1975 pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1976 &xmm_mask_lo, &xmm_mask_hi,
1977 &xmm_dst_lo, &xmm_dst_hi);
1978
1979 save_128_aligned (
1980 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1981
1982 ps += 4;
1983 pd += 4;
1984 pm += 4;
1985 w -= 4;
1986 }
1987
1988 while (w)
1989 {
1990 s = *ps++;
1991 m = *pm++;
1992 d = *pd;
1993
1994 *pd++ = pack_1x128_32 (
1995 pix_multiply_1x128 (
1996 unpack_32_1x128 (d),
1997 negate_1x128 (pix_multiply_1x128 (
1998 unpack_32_1x128 (m),
1999 expand_alpha_1x128 (unpack_32_1x128 (s))))));
2000 w--;
2001 }
2002}
2003
2004static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
2005core_combine_atop_ca_pixel_sse2 (uint32_t src,
2006 uint32_t mask,
2007 uint32_t dst)
2008{
2009 __m128i m = unpack_32_1x128 (mask);
2010 __m128i s = unpack_32_1x128 (src);
2011 __m128i d = unpack_32_1x128 (dst);
2012 __m128i sa = expand_alpha_1x128 (s);
2013 __m128i da = expand_alpha_1x128 (d);
2014
2015 s = pix_multiply_1x128 (s, m);
2016 m = negate_1x128 (pix_multiply_1x128 (m, sa));
2017
2018 return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
2019}
2020
2021static void
2022sse2_combine_atop_ca (pixman_implementation_t *imp,
2023 pixman_op_t op,
2024 uint32_t * pd,
2025 const uint32_t * ps,
2026 const uint32_t * pm,
2027 int w)
2028{
2029 uint32_t s, m, d;
2030
2031 __m128i xmm_src_lo, xmm_src_hi;
2032 __m128i xmm_dst_lo, xmm_dst_hi;
2033 __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
2034 __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
2035 __m128i xmm_mask_lo, xmm_mask_hi;
2036
2037 while (w && (uintptr_t)pd & 15)
2038 {
2039 s = *ps++;
2040 m = *pm++;
2041 d = *pd;
2042
2043 *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
2044 w--;
2045 }
2046
2047 while (w >= 4)
2048 {
2049 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
2050 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
2051 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
2052
2053 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
2054 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
2055 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
2056
2057 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
2058 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
2059 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
2060 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2061
2062 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
2063 &xmm_mask_lo, &xmm_mask_hi,
2064 &xmm_src_lo, &xmm_src_hi);
2065 pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
2066 &xmm_alpha_src_lo, &xmm_alpha_src_hi,
2067 &xmm_mask_lo, &xmm_mask_hi);
2068
2069 negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
2070
2071 pix_add_multiply_2x128 (
2072 &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
2073 &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
2074 &xmm_dst_lo, &xmm_dst_hi);
2075
2076 save_128_aligned (
2077 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2078
2079 ps += 4;
2080 pd += 4;
2081 pm += 4;
2082 w -= 4;
2083 }
2084
2085 while (w)
2086 {
2087 s = *ps++;
2088 m = *pm++;
2089 d = *pd;
2090
2091 *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
2092 w--;
2093 }
2094}
2095
2096static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
2097core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src,
2098 uint32_t mask,
2099 uint32_t dst)
2100{
2101 __m128i m = unpack_32_1x128 (mask);
2102 __m128i s = unpack_32_1x128 (src);
2103 __m128i d = unpack_32_1x128 (dst);
2104
2105 __m128i da = negate_1x128 (expand_alpha_1x128 (d));
2106 __m128i sa = expand_alpha_1x128 (s);
2107
2108 s = pix_multiply_1x128 (s, m);
2109 m = pix_multiply_1x128 (m, sa);
2110
2111 return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
2112}
2113
2114static void
2115sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
2116 pixman_op_t op,
2117 uint32_t * pd,
2118 const uint32_t * ps,
2119 const uint32_t * pm,
2120 int w)
2121{
2122 uint32_t s, m, d;
2123
2124 __m128i xmm_src_lo, xmm_src_hi;
2125 __m128i xmm_dst_lo, xmm_dst_hi;
2126 __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
2127 __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
2128 __m128i xmm_mask_lo, xmm_mask_hi;
2129
2130 while (w && (uintptr_t)pd & 15)
2131 {
2132 s = *ps++;
2133 m = *pm++;
2134 d = *pd;
2135
2136 *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
2137 w--;
2138 }
2139
2140 while (w >= 4)
2141 {
2142 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
2143 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
2144 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
2145
2146 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
2147 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
2148 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
2149
2150 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
2151 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
2152 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
2153 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2154
2155 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
2156 &xmm_mask_lo, &xmm_mask_hi,
2157 &xmm_src_lo, &xmm_src_hi);
2158 pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
2159 &xmm_alpha_src_lo, &xmm_alpha_src_hi,
2160 &xmm_mask_lo, &xmm_mask_hi);
2161
2162 negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
2163 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2164
2165 pix_add_multiply_2x128 (
2166 &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
2167 &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
2168 &xmm_dst_lo, &xmm_dst_hi);
2169
2170 save_128_aligned (
2171 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2172
2173 ps += 4;
2174 pd += 4;
2175 pm += 4;
2176 w -= 4;
2177 }
2178
2179 while (w)
2180 {
2181 s = *ps++;
2182 m = *pm++;
2183 d = *pd;
2184
2185 *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
2186 w--;
2187 }
2188}
2189
2190static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
2191core_combine_xor_ca_pixel_sse2 (uint32_t src,
2192 uint32_t mask,
2193 uint32_t dst)
2194{
2195 __m128i a = unpack_32_1x128 (mask);
2196 __m128i s = unpack_32_1x128 (src);
2197 __m128i d = unpack_32_1x128 (dst);
2198
2199 __m128i alpha_dst = negate_1x128 (pix_multiply_1x128 (
2200 a, expand_alpha_1x128 (s)));
2201 __m128i dest = pix_multiply_1x128 (s, a);
2202 __m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d));
2203
2204 return pack_1x128_32 (pix_add_multiply_1x128 (&d,
2205 &alpha_dst,
2206 &dest,
2207 &alpha_src));
2208}
2209
2210static void
2211sse2_combine_xor_ca (pixman_implementation_t *imp,
2212 pixman_op_t op,
2213 uint32_t * pd,
2214 const uint32_t * ps,
2215 const uint32_t * pm,
2216 int w)
2217{
2218 uint32_t s, m, d;
2219
2220 __m128i xmm_src_lo, xmm_src_hi;
2221 __m128i xmm_dst_lo, xmm_dst_hi;
2222 __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
2223 __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
2224 __m128i xmm_mask_lo, xmm_mask_hi;
2225
2226 while (w && (uintptr_t)pd & 15)
2227 {
2228 s = *ps++;
2229 m = *pm++;
2230 d = *pd;
2231
2232 *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
2233 w--;
2234 }
2235
2236 while (w >= 4)
2237 {
2238 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
2239 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
2240 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
2241
2242 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
2243 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
2244 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
2245
2246 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
2247 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
2248 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
2249 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2250
2251 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
2252 &xmm_mask_lo, &xmm_mask_hi,
2253 &xmm_src_lo, &xmm_src_hi);
2254 pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
2255 &xmm_alpha_src_lo, &xmm_alpha_src_hi,
2256 &xmm_mask_lo, &xmm_mask_hi);
2257
2258 negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
2259 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2260 negate_2x128 (xmm_mask_lo, xmm_mask_hi,
2261 &xmm_mask_lo, &xmm_mask_hi);
2262
2263 pix_add_multiply_2x128 (
2264 &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
2265 &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
2266 &xmm_dst_lo, &xmm_dst_hi);
2267
2268 save_128_aligned (
2269 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2270
2271 ps += 4;
2272 pd += 4;
2273 pm += 4;
2274 w -= 4;
2275 }
2276
2277 while (w)
2278 {
2279 s = *ps++;
2280 m = *pm++;
2281 d = *pd;
2282
2283 *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
2284 w--;
2285 }
2286}
2287
2288static void
2289sse2_combine_add_ca (pixman_implementation_t *imp,
2290 pixman_op_t op,
2291 uint32_t * pd,
2292 const uint32_t * ps,
2293 const uint32_t * pm,
2294 int w)
2295{
2296 uint32_t s, m, d;
2297
2298 __m128i xmm_src_lo, xmm_src_hi;
2299 __m128i xmm_dst_lo, xmm_dst_hi;
2300 __m128i xmm_mask_lo, xmm_mask_hi;
2301
2302 while (w && (uintptr_t)pd & 15)
2303 {
2304 s = *ps++;
2305 m = *pm++;
2306 d = *pd;
2307
2308 *pd++ = pack_1x128_32 (
2309 _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
2310 unpack_32_1x128 (m)),
2311 unpack_32_1x128 (d)));
2312 w--;
2313 }
2314
2315 while (w >= 4)
2316 {
2317 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
2318 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
2319 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
2320
2321 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
2322 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
2323 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
2324
2325 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
2326 &xmm_mask_lo, &xmm_mask_hi,
2327 &xmm_src_lo, &xmm_src_hi);
2328
2329 save_128_aligned (
2330 (__m128i*)pd, pack_2x128_128 (
2331 _mm_adds_epu8 (xmm_src_lo, xmm_dst_lo),
2332 _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi)));
2333
2334 ps += 4;
2335 pd += 4;
2336 pm += 4;
2337 w -= 4;
2338 }
2339
2340 while (w)
2341 {
2342 s = *ps++;
2343 m = *pm++;
2344 d = *pd;
2345
2346 *pd++ = pack_1x128_32 (
2347 _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
2348 unpack_32_1x128 (m)),
2349 unpack_32_1x128 (d)));
2350 w--;
2351 }
2352}
2353
2354static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
2355create_mask_16_128 (uint16_t mask)
2356{
2357 return _mm_set1_epi16 (mask);
2358}
2359
2360/* Work around a code generation bug in Sun Studio 12. */
2361#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
2362# define create_mask_2x32_128(mask0, mask1) \
2363 (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1)))
2364#else
2365static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
2366create_mask_2x32_128 (uint32_t mask0,
2367 uint32_t mask1)
2368{
2369 return _mm_set_epi32 (mask0, mask1, mask0, mask1);
2370}
2371#endif
2372
2373static void
2374sse2_composite_over_n_8888 (pixman_implementation_t *imp,
2375 pixman_composite_info_t *info)
2376{
2377 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
2378 uint32_t src;
2379 uint32_t *dst_line, *dst, d;
2380 int32_t w;
2381 int dst_stride;
2382 __m128i xmm_src, xmm_alpha;
2383 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
2384
2385 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2386
2387 if (src == 0)
2388 return;
2389
2390 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2391 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
2392
2393 xmm_src = expand_pixel_32_1x128 (src);
2394 xmm_alpha = expand_alpha_1x128 (xmm_src);
2395
2396 while (height--)
2397 {
2398 dst = dst_line;
2399
2400 dst_line += dst_stride;
2401 w = width;
2402
2403 while (w && (uintptr_t)dst & 15)
2404 {
2405 d = *dst;
2406 *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
2407 xmm_alpha,
2408 unpack_32_1x128 (d)));
2409 w--;
2410 }
2411
2412 while (w >= 4)
2413 {
2414 xmm_dst = load_128_aligned ((__m128i*)dst);
2415
2416 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
2417
2418 over_2x128 (&xmm_src, &xmm_src,
2419 &xmm_alpha, &xmm_alpha,
2420 &xmm_dst_lo, &xmm_dst_hi);
2421
2422 /* rebuid the 4 pixel data and save*/
2423 save_128_aligned (
2424 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2425
2426 w -= 4;
2427 dst += 4;
2428 }
2429
2430 while (w)
2431 {
2432 d = *dst;
2433 *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
2434 xmm_alpha,
2435 unpack_32_1x128 (d)));
2436 w--;
2437 }
2438
2439 }
2440}
2441
2442static void
2443sse2_composite_over_n_0565 (pixman_implementation_t *imp,
2444 pixman_composite_info_t *info)
2445{
2446 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
2447 uint32_t src;
2448 uint16_t *dst_line, *dst, d;
2449 int32_t w;
2450 int dst_stride;
2451 __m128i xmm_src, xmm_alpha;
2452 __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
2453
2454 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2455
2456 if (src == 0)
2457 return;
2458
2459 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2460 dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
2461
2462 xmm_src = expand_pixel_32_1x128 (src);
2463 xmm_alpha = expand_alpha_1x128 (xmm_src);
2464
2465 while (height--)
2466 {
2467 dst = dst_line;
2468
2469 dst_line += dst_stride;
2470 w = width;
2471
2472 while (w && (uintptr_t)dst & 15)
2473 {
2474 d = *dst;
2475
2476 *dst++ = pack_565_32_16 (
2477 pack_1x128_32 (over_1x128 (xmm_src,
2478 xmm_alpha,
2479 expand565_16_1x128 (d))));
2480 w--;
2481 }
2482
2483 while (w >= 8)
2484 {
2485 xmm_dst = load_128_aligned ((__m128i*)dst);
2486
2487 unpack_565_128_4x128 (xmm_dst,
2488 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
2489
2490 over_2x128 (&xmm_src, &xmm_src,
2491 &xmm_alpha, &xmm_alpha,
2492 &xmm_dst0, &xmm_dst1);
2493 over_2x128 (&xmm_src, &xmm_src,
2494 &xmm_alpha, &xmm_alpha,
2495 &xmm_dst2, &xmm_dst3);
2496
2497 xmm_dst = pack_565_4x128_128 (
2498 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
2499
2500 save_128_aligned ((__m128i*)dst, xmm_dst);
2501
2502 dst += 8;
2503 w -= 8;
2504 }
2505
2506 while (w--)
2507 {
2508 d = *dst;
2509 *dst++ = pack_565_32_16 (
2510 pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha,
2511 expand565_16_1x128 (d))));
2512 }
2513 }
2514
2515}
2516
2517static void
2518sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
2519 pixman_composite_info_t *info)
2520{
2521 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
2522 uint32_t src;
2523 uint32_t *dst_line, d;
2524 uint32_t *mask_line, m;
2525 uint32_t pack_cmp;
2526 int dst_stride, mask_stride;
2527
2528 __m128i xmm_src;
2529 __m128i xmm_dst;
2530 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
2531
2532 __m128i mmx_src, mmx_mask, mmx_dest;
2533
2534 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2535
2536 if (src == 0)
2537 return;
2538
2539 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2540 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
2541 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
2542 mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
2543
2544 xmm_src = _mm_unpacklo_epi8 (
2545 create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
2546 mmx_src = xmm_src;
2547
2548 while (height--)
2549 {
2550 int w = width;
2551 const uint32_t *pm = (uint32_t *)mask_line;
2552 uint32_t *pd = (uint32_t *)dst_line;
2553
2554 dst_line += dst_stride;
2555 mask_line += mask_stride;
2556
2557 while (w && (uintptr_t)pd & 15)
2558 {
2559 m = *pm++;
2560
2561 if (m)
2562 {
2563 d = *pd;
2564
2565 mmx_mask = unpack_32_1x128 (m);
2566 mmx_dest = unpack_32_1x128 (d);
2567
2568 *pd = pack_1x128_32 (
2569 _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
2570 mmx_dest));
2571 }
2572
2573 pd++;
2574 w--;
2575 }
2576
2577 while (w >= 4)
2578 {
2579 xmm_mask = load_128_unaligned ((__m128i*)pm);
2580
2581 pack_cmp =
2582 _mm_movemask_epi8 (
2583 _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
2584
2585 /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
2586 if (pack_cmp != 0xffff)
2587 {
2588 xmm_dst = load_128_aligned ((__m128i*)pd);
2589
2590 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
2591
2592 pix_multiply_2x128 (&xmm_src, &xmm_src,
2593 &xmm_mask_lo, &xmm_mask_hi,
2594 &xmm_mask_lo, &xmm_mask_hi);
2595 xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi);
2596
2597 save_128_aligned (
2598 (__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst));
2599 }
2600
2601 pd += 4;
2602 pm += 4;
2603 w -= 4;
2604 }
2605
2606 while (w)
2607 {
2608 m = *pm++;
2609
2610 if (m)
2611 {
2612 d = *pd;
2613
2614 mmx_mask = unpack_32_1x128 (m);
2615 mmx_dest = unpack_32_1x128 (d);
2616
2617 *pd = pack_1x128_32 (
2618 _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
2619 mmx_dest));
2620 }
2621
2622 pd++;
2623 w--;
2624 }
2625 }
2626
2627}
2628
2629static void
2630sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
2631 pixman_composite_info_t *info)
2632{
2633 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
2634 uint32_t src;
2635 uint32_t *dst_line, d;
2636 uint32_t *mask_line, m;
2637 uint32_t pack_cmp;
2638 int dst_stride, mask_stride;
2639
2640 __m128i xmm_src, xmm_alpha;
2641 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
2642 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
2643
2644 __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
2645
2646 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2647
2648 if (src == 0)
2649 return;
2650
2651 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2652 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
2653 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
2654 mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
2655
2656 xmm_src = _mm_unpacklo_epi8 (
2657 create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
2658 xmm_alpha = expand_alpha_1x128 (xmm_src);
2659 mmx_src = xmm_src;
2660 mmx_alpha = xmm_alpha;
2661
2662 while (height--)
2663 {
2664 int w = width;
2665 const uint32_t *pm = (uint32_t *)mask_line;
2666 uint32_t *pd = (uint32_t *)dst_line;
2667
2668 dst_line += dst_stride;
2669 mask_line += mask_stride;
2670
2671 while (w && (uintptr_t)pd & 15)
2672 {
2673 m = *pm++;
2674
2675 if (m)
2676 {
2677 d = *pd;
2678 mmx_mask = unpack_32_1x128 (m);
2679 mmx_dest = unpack_32_1x128 (d);
2680
2681 *pd = pack_1x128_32 (in_over_1x128 (&mmx_src,
2682 &mmx_alpha,
2683 &mmx_mask,
2684 &mmx_dest));
2685 }
2686
2687 pd++;
2688 w--;
2689 }
2690
2691 while (w >= 4)
2692 {
2693 xmm_mask = load_128_unaligned ((__m128i*)pm);
2694
2695 pack_cmp =
2696 _mm_movemask_epi8 (
2697 _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
2698
2699 /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
2700 if (pack_cmp != 0xffff)
2701 {
2702 xmm_dst = load_128_aligned ((__m128i*)pd);
2703
2704 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
2705 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
2706
2707 in_over_2x128 (&xmm_src, &xmm_src,
2708 &xmm_alpha, &xmm_alpha,
2709 &xmm_mask_lo, &xmm_mask_hi,
2710 &xmm_dst_lo, &xmm_dst_hi);
2711
2712 save_128_aligned (
2713 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2714 }
2715
2716 pd += 4;
2717 pm += 4;
2718 w -= 4;
2719 }
2720
2721 while (w)
2722 {
2723 m = *pm++;
2724
2725 if (m)
2726 {
2727 d = *pd;
2728 mmx_mask = unpack_32_1x128 (m);
2729 mmx_dest = unpack_32_1x128 (d);
2730
2731 *pd = pack_1x128_32 (
2732 in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));
2733 }
2734
2735 pd++;
2736 w--;
2737 }
2738 }
2739
2740}
2741
2742static void
2743sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
2744 pixman_composite_info_t *info)
2745{
2746 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
2747 uint32_t *dst_line, *dst;
2748 uint32_t *src_line, *src;
2749 uint32_t mask;
2750 int32_t w;
2751 int dst_stride, src_stride;
2752
2753 __m128i xmm_mask;
2754 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
2755 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
2756 __m128i xmm_alpha_lo, xmm_alpha_hi;
2757
2758 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2759 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
2760 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
2761 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
2762
2763 mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
2764
2765 xmm_mask = create_mask_16_128 (mask >> 24);
2766
2767 while (height--)
2768 {
2769 dst = dst_line;
2770 dst_line += dst_stride;
2771 src = src_line;
2772 src_line += src_stride;
2773 w = width;
2774
2775 while (w && (uintptr_t)dst & 15)
2776 {
2777 uint32_t s = *src++;
2778
2779 if (s)
2780 {
2781 uint32_t d = *dst;
2782
2783 __m128i ms = unpack_32_1x128 (s);
2784 __m128i alpha = expand_alpha_1x128 (ms);
2785 __m128i dest = xmm_mask;
2786 __m128i alpha_dst = unpack_32_1x128 (d);
2787
2788 *dst = pack_1x128_32 (
2789 in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
2790 }
2791 dst++;
2792 w--;
2793 }
2794
2795 while (w >= 4)
2796 {
2797 xmm_src = load_128_unaligned ((__m128i*)src);
2798
2799 if (!is_zero (xmm_src))
2800 {
2801 xmm_dst = load_128_aligned ((__m128i*)dst);
2802
2803 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
2804 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
2805 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
2806 &xmm_alpha_lo, &xmm_alpha_hi);
2807
2808 in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
2809 &xmm_alpha_lo, &xmm_alpha_hi,
2810 &xmm_mask, &xmm_mask,
2811 &xmm_dst_lo, &xmm_dst_hi);
2812
2813 save_128_aligned (
2814 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2815 }
2816
2817 dst += 4;
2818 src += 4;
2819 w -= 4;
2820 }
2821
2822 while (w)
2823 {
2824 uint32_t s = *src++;
2825
2826 if (s)
2827 {
2828 uint32_t d = *dst;
2829
2830 __m128i ms = unpack_32_1x128 (s);
2831 __m128i alpha = expand_alpha_1x128 (ms);
2832 __m128i mask = xmm_mask;
2833 __m128i dest = unpack_32_1x128 (d);
2834
2835 *dst = pack_1x128_32 (
2836 in_over_1x128 (&ms, &alpha, &mask, &dest));
2837 }
2838
2839 dst++;
2840 w--;
2841 }
2842 }
2843
2844}
2845
2846static void
2847sse2_composite_src_x888_0565 (pixman_implementation_t *imp,
2848 pixman_composite_info_t *info)
2849{
2850 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
2851 uint16_t *dst_line, *dst;
2852 uint32_t *src_line, *src, s;
2853 int dst_stride, src_stride;
2854 int32_t w;
2855
2856 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
2857 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
2858
2859 while (height--)
2860 {
2861 dst = dst_line;
2862 dst_line += dst_stride;
2863 src = src_line;
2864 src_line += src_stride;
2865 w = width;
2866
2867 while (w && (uintptr_t)dst & 15)
2868 {
2869 s = *src++;
2870 *dst = convert_8888_to_0565 (s);
2871 dst++;
2872 w--;
2873 }
2874
2875 while (w >= 8)
2876 {
2877 __m128i xmm_src0 = load_128_unaligned ((__m128i *)src + 0);
2878 __m128i xmm_src1 = load_128_unaligned ((__m128i *)src + 1);
2879
2880 save_128_aligned ((__m128i*)dst, pack_565_2packedx128_128 (xmm_src0, xmm_src1));
2881
2882 w -= 8;
2883 src += 8;
2884 dst += 8;
2885 }
2886
2887 while (w)
2888 {
2889 s = *src++;
2890 *dst = convert_8888_to_0565 (s);
2891 dst++;
2892 w--;
2893 }
2894 }
2895}
2896
2897static void
2898sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
2899 pixman_composite_info_t *info)
2900{
2901 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
2902 uint32_t *dst_line, *dst;
2903 uint32_t *src_line, *src;
2904 int32_t w;
2905 int dst_stride, src_stride;
2906
2907
2908 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2909 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
2910 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
2911 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
2912
2913 while (height--)
2914 {
2915 dst = dst_line;
2916 dst_line += dst_stride;
2917 src = src_line;
2918 src_line += src_stride;
2919 w = width;
2920
2921 while (w && (uintptr_t)dst & 15)
2922 {
2923 *dst++ = *src++ | 0xff000000;
2924 w--;
2925 }
2926
2927 while (w >= 16)
2928 {
2929 __m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4;
2930
2931 xmm_src1 = load_128_unaligned ((__m128i*)src + 0);
2932 xmm_src2 = load_128_unaligned ((__m128i*)src + 1);
2933 xmm_src3 = load_128_unaligned ((__m128i*)src + 2);
2934 xmm_src4 = load_128_unaligned ((__m128i*)src + 3);
2935
2936 save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000));
2937 save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000));
2938 save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000));
2939 save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000));
2940
2941 dst += 16;
2942 src += 16;
2943 w -= 16;
2944 }
2945
2946 while (w)
2947 {
2948 *dst++ = *src++ | 0xff000000;
2949 w--;
2950 }
2951 }
2952
2953}
2954
2955static void
2956sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
2957 pixman_composite_info_t *info)
2958{
2959 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
2960 uint32_t *dst_line, *dst;
2961 uint32_t *src_line, *src;
2962 uint32_t mask;
2963 int dst_stride, src_stride;
2964 int32_t w;
2965
2966 __m128i xmm_mask, xmm_alpha;
2967 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
2968 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
2969
2970 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2971 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
2972 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
2973 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
2974
2975 mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
2976
2977 xmm_mask = create_mask_16_128 (mask >> 24);
2978 xmm_alpha = mask_00ff;
2979
2980 while (height--)
2981 {
2982 dst = dst_line;
2983 dst_line += dst_stride;
2984 src = src_line;
2985 src_line += src_stride;
2986 w = width;
2987
2988 while (w && (uintptr_t)dst & 15)
2989 {
2990 uint32_t s = (*src++) | 0xff000000;
2991 uint32_t d = *dst;
2992
2993 __m128i src = unpack_32_1x128 (s);
2994 __m128i alpha = xmm_alpha;
2995 __m128i mask = xmm_mask;
2996 __m128i dest = unpack_32_1x128 (d);
2997
2998 *dst++ = pack_1x128_32 (
2999 in_over_1x128 (&src, &alpha, &mask, &dest));
3000
3001 w--;
3002 }
3003
3004 while (w >= 4)
3005 {
3006 xmm_src = _mm_or_si128 (
3007 load_128_unaligned ((__m128i*)src), mask_ff000000);
3008 xmm_dst = load_128_aligned ((__m128i*)dst);
3009
3010 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3011 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
3012
3013 in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
3014 &xmm_alpha, &xmm_alpha,
3015 &xmm_mask, &xmm_mask,
3016 &xmm_dst_lo, &xmm_dst_hi);
3017
3018 save_128_aligned (
3019 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
3020
3021 dst += 4;
3022 src += 4;
3023 w -= 4;
3024
3025 }
3026
3027 while (w)
3028 {
3029 uint32_t s = (*src++) | 0xff000000;
3030 uint32_t d = *dst;
3031
3032 __m128i src = unpack_32_1x128 (s);
3033 __m128i alpha = xmm_alpha;
3034 __m128i mask = xmm_mask;
3035 __m128i dest = unpack_32_1x128 (d);
3036
3037 *dst++ = pack_1x128_32 (
3038 in_over_1x128 (&src, &alpha, &mask, &dest));
3039
3040 w--;
3041 }
3042 }
3043
3044}
3045
3046static void
3047sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
3048 pixman_composite_info_t *info)
3049{
3050 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
3051 int dst_stride, src_stride;
3052 uint32_t *dst_line, *dst;
3053 uint32_t *src_line, *src;
3054
3055 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3056 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
3057 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
3058 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
3059
3060 dst = dst_line;
3061 src = src_line;
3062
3063 while (height--)
3064 {
3065 sse2_combine_over_u (imp, op, dst, src, NULL((void*)0), width);
3066
3067 dst += dst_stride;
3068 src += src_stride;
3069 }
3070}
3071
3072static force_inline__inline__ __attribute__ ((__always_inline__)) uint16_t
3073composite_over_8888_0565pixel (uint32_t src, uint16_t dst)
3074{
3075 __m128i ms;
3076
3077 ms = unpack_32_1x128 (src);
3078 return pack_565_32_16 (
3079 pack_1x128_32 (
3080 over_1x128 (
3081 ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst))));
3082}
3083
3084static void
3085sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
3086 pixman_composite_info_t *info)
3087{
3088 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
3089 uint16_t *dst_line, *dst, d;
3090 uint32_t *src_line, *src, s;
3091 int dst_stride, src_stride;
3092 int32_t w;
3093
3094 __m128i xmm_alpha_lo, xmm_alpha_hi;
3095 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
3096 __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
3097
3098 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3099 dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
3100 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
3101 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
3102
3103 while (height--)
3104 {
3105 dst = dst_line;
3106 src = src_line;
3107
3108 dst_line += dst_stride;
3109 src_line += src_stride;
3110 w = width;
3111
3112 /* Align dst on a 16-byte boundary */
3113 while (w &&
3114 ((uintptr_t)dst & 15))
3115 {
3116 s = *src++;
3117 d = *dst;
3118
3119 *dst++ = composite_over_8888_0565pixel (s, d);
3120 w--;
3121 }
3122
3123 /* It's a 8 pixel loop */
3124 while (w >= 8)
3125 {
3126 /* I'm loading unaligned because I'm not sure
3127 * about the address alignment.
3128 */
3129 xmm_src = load_128_unaligned ((__m128i*) src);
3130 xmm_dst = load_128_aligned ((__m128i*) dst);
3131
3132 /* Unpacking */
3133 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3134 unpack_565_128_4x128 (xmm_dst,
3135 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
3136 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
3137 &xmm_alpha_lo, &xmm_alpha_hi);
3138
3139 /* I'm loading next 4 pixels from memory
3140 * before to optimze the memory read.
3141 */
3142 xmm_src = load_128_unaligned ((__m128i*) (src + 4));
3143
3144 over_2x128 (&xmm_src_lo, &xmm_src_hi,
3145 &xmm_alpha_lo, &xmm_alpha_hi,
3146 &xmm_dst0, &xmm_dst1);
3147
3148 /* Unpacking */
3149 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3150 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
3151 &xmm_alpha_lo, &xmm_alpha_hi);
3152
3153 over_2x128 (&xmm_src_lo, &xmm_src_hi,
3154 &xmm_alpha_lo, &xmm_alpha_hi,
3155 &xmm_dst2, &xmm_dst3);
3156
3157 save_128_aligned (
3158 (__m128i*)dst, pack_565_4x128_128 (
3159 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
3160
3161 w -= 8;
3162 dst += 8;
3163 src += 8;
3164 }
3165
3166 while (w--)
3167 {
3168 s = *src++;
3169 d = *dst;
3170
3171 *dst++ = composite_over_8888_0565pixel (s, d);
3172 }
3173 }
3174
3175}
3176
3177static void
3178sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
3179 pixman_composite_info_t *info)
3180{
3181 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
3182 uint32_t src, srca;
3183 uint32_t *dst_line, *dst;
3184 uint8_t *mask_line, *mask;
3185 int dst_stride, mask_stride;
3186 int32_t w;
3187 uint32_t d;
3188
3189 __m128i xmm_src, xmm_alpha, xmm_def;
3190 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
3191 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
3192
3193 __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
3194
3195 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
3196
3197 srca = src >> 24;
3198 if (src == 0)
3199 return;
3200
3201 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3202 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
3203 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
3204 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
3205
3206 xmm_def = create_mask_2x32_128 (src, src);
3207 xmm_src = expand_pixel_32_1x128 (src);
3208 xmm_alpha = expand_alpha_1x128 (xmm_src);
3209 mmx_src = xmm_src;
3210 mmx_alpha = xmm_alpha;
3211
3212 while (height--)
3213 {
3214 dst = dst_line;
3215 dst_line += dst_stride;
3216 mask = mask_line;
3217 mask_line += mask_stride;
3218 w = width;
3219
3220 while (w && (uintptr_t)dst & 15)
3221 {
3222 uint8_t m = *mask++;
3223
3224 if (m)
3225 {
3226 d = *dst;
3227 mmx_mask = expand_pixel_8_1x128 (m);
3228 mmx_dest = unpack_32_1x128 (d);
3229
3230 *dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
3231 &mmx_alpha,
3232 &mmx_mask,
3233 &mmx_dest));
3234 }
3235
3236 w--;
3237 dst++;
3238 }
3239
3240 while (w >= 4)
3241 {
3242 uint32_t m;
3243 memcpy(&m, mask, sizeof(uint32_t));
3244
3245 if (srca == 0xff && m == 0xffffffff)
3246 {
3247 save_128_aligned ((__m128i*)dst, xmm_def);
3248 }
3249 else if (m)
3250 {
3251 xmm_dst = load_128_aligned ((__m128i*) dst);
3252 xmm_mask = unpack_32_1x128 (m);
3253 xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
3254
3255 /* Unpacking */
3256 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
3257 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3258
3259 expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
3260 &xmm_mask_lo, &xmm_mask_hi);
3261
3262 in_over_2x128 (&xmm_src, &xmm_src,
3263 &xmm_alpha, &xmm_alpha,
3264 &xmm_mask_lo, &xmm_mask_hi,
3265 &xmm_dst_lo, &xmm_dst_hi);
3266
3267 save_128_aligned (
3268 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
3269 }
3270
3271 w -= 4;
3272 dst += 4;
3273 mask += 4;
3274 }
3275
3276 while (w)
3277 {
3278 uint8_t m = *mask++;
3279
3280 if (m)
3281 {
3282 d = *dst;
3283 mmx_mask = expand_pixel_8_1x128 (m);
3284 mmx_dest = unpack_32_1x128 (d);
3285
3286 *dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
3287 &mmx_alpha,
3288 &mmx_mask,
3289 &mmx_dest));
3290 }
3291
3292 w--;
3293 dst++;
3294 }
3295 }
3296
3297}
3298
3299#if defined(__GNUC__4) && !defined(__x86_64__1) && !defined(__amd64__1)
3300__attribute__((__force_align_arg_pointer__))
3301#endif
3302static pixman_bool_t
3303sse2_fill (pixman_implementation_t *imp,
3304 uint32_t * bits,
3305 int stride,
3306 int bpp,
3307 int x,
3308 int y,
3309 int width,
3310 int height,
3311 uint32_t filler)
3312{
3313 uint32_t byte_width;
3314 uint8_t *byte_line;
3315
3316 __m128i xmm_def;
3317
3318 if (bpp == 8)
3319 {
3320 uint32_t b;
3321 uint32_t w;
3322
3323 stride = stride * (int) sizeof (uint32_t) / 1;
3324 byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
3325 byte_width = width;
3326 stride *= 1;
3327
3328 b = filler & 0xff;
3329 w = (b << 8) | b;
3330 filler = (w << 16) | w;
3331 }
3332 else if (bpp == 16)
3333 {
3334 stride = stride * (int) sizeof (uint32_t) / 2;
3335 byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
3336 byte_width = 2 * width;
3337 stride *= 2;
3338
3339 filler = (filler & 0xffff) * 0x00010001;
3340 }
3341 else if (bpp == 32)
3342 {
3343 stride = stride * (int) sizeof (uint32_t) / 4;
3344 byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
3345 byte_width = 4 * width;
3346 stride *= 4;
3347 }
3348 else
3349 {
3350 return FALSE0;
3351 }
3352
3353 xmm_def = create_mask_2x32_128 (filler, filler);
3354
3355 while (height--)
3356 {
3357 int w;
3358 uint8_t *d = byte_line;
3359 byte_line += stride;
3360 w = byte_width;
3361
3362 if (w >= 1 && ((uintptr_t)d & 1))
3363 {
3364 *(uint8_t *)d = filler;
3365 w -= 1;
3366 d += 1;
3367 }
3368
3369 while (w >= 2 && ((uintptr_t)d & 3))
3370 {
3371 *(uint16_t *)d = filler;
3372 w -= 2;
3373 d += 2;
3374 }
3375
3376 while (w >= 4 && ((uintptr_t)d & 15))
3377 {
3378 *(uint32_t *)d = filler;
3379
3380 w -= 4;
3381 d += 4;
3382 }
3383
3384 while (w >= 128)
3385 {
3386 save_128_aligned ((__m128i*)(d), xmm_def);
3387 save_128_aligned ((__m128i*)(d + 16), xmm_def);
3388 save_128_aligned ((__m128i*)(d + 32), xmm_def);
3389 save_128_aligned ((__m128i*)(d + 48), xmm_def);
3390 save_128_aligned ((__m128i*)(d + 64), xmm_def);
3391 save_128_aligned ((__m128i*)(d + 80), xmm_def);
3392 save_128_aligned ((__m128i*)(d + 96), xmm_def);
3393 save_128_aligned ((__m128i*)(d + 112), xmm_def);
3394
3395 d += 128;
3396 w -= 128;
3397 }
3398
3399 if (w >= 64)
3400 {
3401 save_128_aligned ((__m128i*)(d), xmm_def);
3402 save_128_aligned ((__m128i*)(d + 16), xmm_def);
3403 save_128_aligned ((__m128i*)(d + 32), xmm_def);
3404 save_128_aligned ((__m128i*)(d + 48), xmm_def);
3405
3406 d += 64;
3407 w -= 64;
3408 }
3409
3410 if (w >= 32)
3411 {
3412 save_128_aligned ((__m128i*)(d), xmm_def);
3413 save_128_aligned ((__m128i*)(d + 16), xmm_def);
3414
3415 d += 32;
3416 w -= 32;
3417 }
3418
3419 if (w >= 16)
3420 {
3421 save_128_aligned ((__m128i*)(d), xmm_def);
3422
3423 d += 16;
3424 w -= 16;
3425 }
3426
3427 while (w >= 4)
3428 {
3429 *(uint32_t *)d = filler;
3430
3431 w -= 4;
3432 d += 4;
3433 }
3434
3435 if (w >= 2)
3436 {
3437 *(uint16_t *)d = filler;
3438 w -= 2;
3439 d += 2;
3440 }
3441
3442 if (w >= 1)
3443 {
3444 *(uint8_t *)d = filler;
3445 w -= 1;
3446 d += 1;
3447 }
3448 }
3449
3450 return TRUE1;
3451}
3452
3453static void
3454sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
3455 pixman_composite_info_t *info)
3456{
3457 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
3458 uint32_t src, srca;
3459 uint32_t *dst_line, *dst;
3460 uint8_t *mask_line, *mask;
3461 int dst_stride, mask_stride;
3462 int32_t w;
3463
3464 __m128i xmm_src, xmm_def;
3465 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
3466
3467 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
3468
3469 srca = src >> 24;
3470 if (src == 0)
3471 {
3472 sse2_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride,
3473 PIXMAN_FORMAT_BPP (dest_image->bits.format)(((dest_image->bits.format >> (24)) & ((1 <<
(8)) - 1)) << ((dest_image->bits.format >> 22
) & 3))
,
3474 dest_x, dest_y, width, height, 0);
3475 return;
3476 }
3477
3478 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3479 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
3480 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
3481 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
3482
3483 xmm_def = create_mask_2x32_128 (src, src);
3484 xmm_src = expand_pixel_32_1x128 (src);
3485
3486 while (height--)
3487 {
3488 dst = dst_line;
3489 dst_line += dst_stride;
3490 mask = mask_line;
3491 mask_line += mask_stride;
3492 w = width;
3493
3494 while (w && (uintptr_t)dst & 15)
3495 {
3496 uint8_t m = *mask++;
3497
3498 if (m)
3499 {
3500 *dst = pack_1x128_32 (
3501 pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)));
3502 }
3503 else
3504 {
3505 *dst = 0;
3506 }
3507
3508 w--;
3509 dst++;
3510 }
3511
3512 while (w >= 4)
3513 {
3514 uint32_t m;
3515 memcpy(&m, mask, sizeof(uint32_t));
3516
3517 if (srca == 0xff && m == 0xffffffff)
3518 {
3519 save_128_aligned ((__m128i*)dst, xmm_def);
3520 }
3521 else if (m)
3522 {
3523 xmm_mask = unpack_32_1x128 (m);
3524 xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
3525
3526 /* Unpacking */
3527 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3528
3529 expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
3530 &xmm_mask_lo, &xmm_mask_hi);
3531
3532 pix_multiply_2x128 (&xmm_src, &xmm_src,
3533 &xmm_mask_lo, &xmm_mask_hi,
3534 &xmm_mask_lo, &xmm_mask_hi);
3535
3536 save_128_aligned (
3537 (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
3538 }
3539 else
3540 {
3541 save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ());
3542 }
3543
3544 w -= 4;
3545 dst += 4;
3546 mask += 4;
3547 }
3548
3549 while (w)
3550 {
3551 uint8_t m = *mask++;
3552
3553 if (m)
3554 {
3555 *dst = pack_1x128_32 (
3556 pix_multiply_1x128 (
3557 xmm_src, expand_pixel_8_1x128 (m)));
3558 }
3559 else
3560 {
3561 *dst = 0;
3562 }
3563
3564 w--;
3565 dst++;
3566 }
3567 }
3568
3569}
3570
3571static void
3572sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
3573 pixman_composite_info_t *info)
3574{
3575 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
3576 uint32_t src;
3577 uint16_t *dst_line, *dst, d;
3578 uint8_t *mask_line, *mask;
3579 int dst_stride, mask_stride;
3580 int32_t w;
3581 __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
3582
3583 __m128i xmm_src, xmm_alpha;
3584 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
3585 __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
3586
3587 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
3588
3589 if (src == 0)
3590 return;
3591
3592 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3593 dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
3594 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
3595 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
3596
3597 xmm_src = expand_pixel_32_1x128 (src);
3598 xmm_alpha = expand_alpha_1x128 (xmm_src);
3599 mmx_src = xmm_src;
3600 mmx_alpha = xmm_alpha;
3601
3602 while (height--)
3603 {
3604 dst = dst_line;
3605 dst_line += dst_stride;
3606 mask = mask_line;
3607 mask_line += mask_stride;
3608 w = width;
3609
3610 while (w && (uintptr_t)dst & 15)
3611 {
3612 uint8_t m = *mask++;
3613
3614 if (m)
3615 {
3616 d = *dst;
3617 mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
3618 mmx_dest = expand565_16_1x128 (d);
3619
3620 *dst = pack_565_32_16 (
3621 pack_1x128_32 (
3622 in_over_1x128 (
3623 &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
3624 }
3625
3626 w--;
3627 dst++;
3628 }
3629
3630 while (w >= 8)
3631 {
3632 uint32_t m;
3633
3634 xmm_dst = load_128_aligned ((__m128i*) dst);
3635 unpack_565_128_4x128 (xmm_dst,
3636 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
3637
3638 memcpy(&m, mask, sizeof(uint32_t));
3639 mask += 4;
3640
3641 if (m)
3642 {
3643 xmm_mask = unpack_32_1x128 (m);
3644 xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
3645
3646 /* Unpacking */
3647 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3648
3649 expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
3650 &xmm_mask_lo, &xmm_mask_hi);
3651
3652 in_over_2x128 (&xmm_src, &xmm_src,
3653 &xmm_alpha, &xmm_alpha,
3654 &xmm_mask_lo, &xmm_mask_hi,
3655 &xmm_dst0, &xmm_dst1);
3656 }
3657
3658 memcpy(&m, mask, sizeof(uint32_t));
3659 mask += 4;
3660
3661 if (m)
3662 {
3663 xmm_mask = unpack_32_1x128 (m);
3664 xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
3665
3666 /* Unpacking */
3667 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3668
3669 expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
3670 &xmm_mask_lo, &xmm_mask_hi);
3671 in_over_2x128 (&xmm_src, &xmm_src,
3672 &xmm_alpha, &xmm_alpha,
3673 &xmm_mask_lo, &xmm_mask_hi,
3674 &xmm_dst2, &xmm_dst3);
3675 }
3676
3677 save_128_aligned (
3678 (__m128i*)dst, pack_565_4x128_128 (
3679 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
3680
3681 w -= 8;
3682 dst += 8;
3683 }
3684
3685 while (w)
3686 {
3687 uint8_t m = *mask++;
3688
3689 if (m)
3690 {
3691 d = *dst;
3692 mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
3693 mmx_dest = expand565_16_1x128 (d);
3694
3695 *dst = pack_565_32_16 (
3696 pack_1x128_32 (
3697 in_over_1x128 (
3698 &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
3699 }
3700
3701 w--;
3702 dst++;
3703 }
3704 }
3705
3706}
3707
3708static void
3709sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
3710 pixman_composite_info_t *info)
3711{
3712 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
3713 uint16_t *dst_line, *dst, d;
3714 uint32_t *src_line, *src, s;
3715 int dst_stride, src_stride;
3716 int32_t w;
3717 uint32_t opaque, zero;
3718
3719 __m128i ms;
3720 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
3721 __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
3722
3723 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3724 dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
3725 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
3726 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
3727
3728 while (height--)
3729 {
3730 dst = dst_line;
3731 dst_line += dst_stride;
3732 src = src_line;
3733 src_line += src_stride;
3734 w = width;
3735
3736 while (w && (uintptr_t)dst & 15)
3737 {
3738 s = *src++;
3739 d = *dst;
3740
3741 ms = unpack_32_1x128 (s);
3742
3743 *dst++ = pack_565_32_16 (
3744 pack_1x128_32 (
3745 over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
3746 w--;
3747 }
3748
3749 while (w >= 8)
3750 {
3751 /* First round */
3752 xmm_src = load_128_unaligned ((__m128i*)src);
3753 xmm_dst = load_128_aligned ((__m128i*)dst);
3754
3755 opaque = is_opaque (xmm_src);
3756 zero = is_zero (xmm_src);
3757
3758 unpack_565_128_4x128 (xmm_dst,
3759 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
3760 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3761
3762 /* preload next round*/
3763 xmm_src = load_128_unaligned ((__m128i*)(src + 4));
3764
3765 if (opaque)
3766 {
3767 invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
3768 &xmm_dst0, &xmm_dst1);
3769 }
3770 else if (!zero)
3771 {
3772 over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
3773 &xmm_dst0, &xmm_dst1);
3774 }
3775
3776 /* Second round */
3777 opaque = is_opaque (xmm_src);
3778 zero = is_zero (xmm_src);
3779
3780 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3781
3782 if (opaque)
3783 {
3784 invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
3785 &xmm_dst2, &xmm_dst3);
3786 }
3787 else if (!zero)
3788 {
3789 over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
3790 &xmm_dst2, &xmm_dst3);
3791 }
3792
3793 save_128_aligned (
3794 (__m128i*)dst, pack_565_4x128_128 (
3795 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
3796
3797 w -= 8;
3798 src += 8;
3799 dst += 8;
3800 }
3801
3802 while (w)
3803 {
3804 s = *src++;
3805 d = *dst;
3806
3807 ms = unpack_32_1x128 (s);
3808
3809 *dst++ = pack_565_32_16 (
3810 pack_1x128_32 (
3811 over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
3812 w--;
3813 }
3814 }
3815
3816}
3817
3818static void
3819sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
3820 pixman_composite_info_t *info)
3821{
3822 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
3823 uint32_t *dst_line, *dst, d;
3824 uint32_t *src_line, *src, s;
3825 int dst_stride, src_stride;
3826 int32_t w;
3827 uint32_t opaque, zero;
3828
3829 __m128i xmm_src_lo, xmm_src_hi;
3830 __m128i xmm_dst_lo, xmm_dst_hi;
3831
3832 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3833 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
3834 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
3835 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
3836
3837 while (height--)
3838 {
3839 dst = dst_line;
3840 dst_line += dst_stride;
3841 src = src_line;
3842 src_line += src_stride;
3843 w = width;
3844
3845 while (w && (uintptr_t)dst & 15)
3846 {
3847 s = *src++;
3848 d = *dst;
3849
3850 *dst++ = pack_1x128_32 (
3851 over_rev_non_pre_1x128 (
3852 unpack_32_1x128 (s), unpack_32_1x128 (d)));
3853
3854 w--;
3855 }
3856
3857 while (w >= 4)
3858 {
3859 xmm_src_hi = load_128_unaligned ((__m128i*)src);
3860
3861 opaque = is_opaque (xmm_src_hi);
3862 zero = is_zero (xmm_src_hi);
3863
3864 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
3865
3866 if (opaque)
3867 {
3868 invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
3869 &xmm_dst_lo, &xmm_dst_hi);
3870
3871 save_128_aligned (
3872 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
3873 }
3874 else if (!zero)
3875 {
3876 xmm_dst_hi = load_128_aligned ((__m128i*)dst);
3877
3878 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
3879
3880 over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
3881 &xmm_dst_lo, &xmm_dst_hi);
3882
3883 save_128_aligned (
3884 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
3885 }
3886
3887 w -= 4;
3888 dst += 4;
3889 src += 4;
3890 }
3891
3892 while (w)
3893 {
3894 s = *src++;
3895 d = *dst;
3896
3897 *dst++ = pack_1x128_32 (
3898 over_rev_non_pre_1x128 (
3899 unpack_32_1x128 (s), unpack_32_1x128 (d)));
3900
3901 w--;
3902 }
3903 }
3904
3905}
3906
3907static void
3908sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
3909 pixman_composite_info_t *info)
3910{
3911 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
3912 uint32_t src;
3913 uint16_t *dst_line, *dst, d;
3914 uint32_t *mask_line, *mask, m;
3915 int dst_stride, mask_stride;
3916 int w;
3917 uint32_t pack_cmp;
3918
3919 __m128i xmm_src, xmm_alpha;
3920 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
3921 __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
3922
3923 __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
3924
3925 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
3926
3927 if (src == 0)
3928 return;
3929
3930 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3931 dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
3932 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
3933 mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
3934
3935 xmm_src = expand_pixel_32_1x128 (src);
3936 xmm_alpha = expand_alpha_1x128 (xmm_src);
3937 mmx_src = xmm_src;
3938 mmx_alpha = xmm_alpha;
3939
3940 while (height--)
3941 {
3942 w = width;
3943 mask = mask_line;
3944 dst = dst_line;
3945 mask_line += mask_stride;
3946 dst_line += dst_stride;
3947
3948 while (w && ((uintptr_t)dst & 15))
3949 {
3950 m = *(uint32_t *) mask;
3951
3952 if (m)
3953 {
3954 d = *dst;
3955 mmx_mask = unpack_32_1x128 (m);
3956 mmx_dest = expand565_16_1x128 (d);
3957
3958 *dst = pack_565_32_16 (
3959 pack_1x128_32 (
3960 in_over_1x128 (
3961 &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
3962 }
3963
3964 w--;
3965 dst++;
3966 mask++;
3967 }
3968
3969 while (w >= 8)
3970 {
3971 /* First round */
3972 xmm_mask = load_128_unaligned ((__m128i*)mask);
3973 xmm_dst = load_128_aligned ((__m128i*)dst);
3974
3975 pack_cmp = _mm_movemask_epi8 (
3976 _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
3977
3978 unpack_565_128_4x128 (xmm_dst,
3979 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
3980 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3981
3982 /* preload next round */
3983 xmm_mask = load_128_unaligned ((__m128i*)(mask + 4));
3984
3985 /* preload next round */
3986 if (pack_cmp != 0xffff)
3987 {
3988 in_over_2x128 (&xmm_src, &xmm_src,
3989 &xmm_alpha, &xmm_alpha,
3990 &xmm_mask_lo, &xmm_mask_hi,
3991 &xmm_dst0, &xmm_dst1);
3992 }
3993
3994 /* Second round */
3995 pack_cmp = _mm_movemask_epi8 (
3996 _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
3997
3998 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3999
4000 if (pack_cmp != 0xffff)
4001 {
4002 in_over_2x128 (&xmm_src, &xmm_src,
4003 &xmm_alpha, &xmm_alpha,
4004 &xmm_mask_lo, &xmm_mask_hi,
4005 &xmm_dst2, &xmm_dst3);
4006 }
4007
4008 save_128_aligned (
4009 (__m128i*)dst, pack_565_4x128_128 (
4010 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
4011
4012 w -= 8;
4013 dst += 8;
4014 mask += 8;
4015 }
4016
4017 while (w)
4018 {
4019 m = *(uint32_t *) mask;
4020
4021 if (m)
4022 {
4023 d = *dst;
4024 mmx_mask = unpack_32_1x128 (m);
4025 mmx_dest = expand565_16_1x128 (d);
4026
4027 *dst = pack_565_32_16 (
4028 pack_1x128_32 (
4029 in_over_1x128 (
4030 &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
4031 }
4032
4033 w--;
4034 dst++;
4035 mask++;
4036 }
4037 }
4038
4039}
4040
4041static void
4042sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
4043 pixman_composite_info_t *info)
4044{
4045 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
4046 uint8_t *dst_line, *dst;
4047 uint8_t *mask_line, *mask;
4048 int dst_stride, mask_stride;
4049 uint32_t d;
4050 uint32_t src;
4051 int32_t w;
4052
4053 __m128i xmm_alpha;
4054 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
4055 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4056
4057 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4058 dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
4059 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
4060 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
4061
4062 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
4063
4064 xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
4065
4066 while (height--)
4067 {
4068 dst = dst_line;
4069 dst_line += dst_stride;
4070 mask = mask_line;
4071 mask_line += mask_stride;
4072 w = width;
4073
4074 while (w && ((uintptr_t)dst & 15))
4075 {
4076 uint8_t m = *mask++;
4077 d = (uint32_t) *dst;
4078
4079 *dst++ = (uint8_t) pack_1x128_32 (
4080 pix_multiply_1x128 (
4081 pix_multiply_1x128 (xmm_alpha,
4082 unpack_32_1x128 (m)),
4083 unpack_32_1x128 (d)));
4084 w--;
4085 }
4086
4087 while (w >= 16)
4088 {
4089 xmm_mask = load_128_unaligned ((__m128i*)mask);
4090 xmm_dst = load_128_aligned ((__m128i*)dst);
4091
4092 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
4093 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4094
4095 pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
4096 &xmm_mask_lo, &xmm_mask_hi,
4097 &xmm_mask_lo, &xmm_mask_hi);
4098
4099 pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
4100 &xmm_dst_lo, &xmm_dst_hi,
4101 &xmm_dst_lo, &xmm_dst_hi);
4102
4103 save_128_aligned (
4104 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4105
4106 mask += 16;
4107 dst += 16;
4108 w -= 16;
4109 }
4110
4111 while (w)
4112 {
4113 uint8_t m = *mask++;
4114 d = (uint32_t) *dst;
4115
4116 *dst++ = (uint8_t) pack_1x128_32 (
4117 pix_multiply_1x128 (
4118 pix_multiply_1x128 (
4119 xmm_alpha, unpack_32_1x128 (m)),
4120 unpack_32_1x128 (d)));
4121 w--;
4122 }
4123 }
4124
4125}
4126
4127static void
4128sse2_composite_in_n_8 (pixman_implementation_t *imp,
4129 pixman_composite_info_t *info)
4130{
4131 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
4132 uint8_t *dst_line, *dst;
4133 int dst_stride;
4134 uint32_t d;
4135 uint32_t src;
4136 int32_t w;
4137
4138 __m128i xmm_alpha;
4139 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4140
4141 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4142 dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
4143
4144 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
4145
4146 xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
4147
4148 src = src >> 24;
4149
4150 if (src == 0xff)
4151 return;
4152
4153 if (src == 0x00)
4154 {
4155 pixman_fill_moz_pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
4156 8, dest_x, dest_y, width, height, src);
4157
4158 return;
4159 }
4160
4161 while (height--)
4162 {
4163 dst = dst_line;
4164 dst_line += dst_stride;
4165 w = width;
4166
4167 while (w && ((uintptr_t)dst & 15))
4168 {
4169 d = (uint32_t) *dst;
4170
4171 *dst++ = (uint8_t) pack_1x128_32 (
4172 pix_multiply_1x128 (
4173 xmm_alpha,
4174 unpack_32_1x128 (d)));
4175 w--;
4176 }
4177
4178 while (w >= 16)
4179 {
4180 xmm_dst = load_128_aligned ((__m128i*)dst);
4181
4182 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4183
4184 pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
4185 &xmm_dst_lo, &xmm_dst_hi,
4186 &xmm_dst_lo, &xmm_dst_hi);
4187
4188 save_128_aligned (
4189 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4190
4191 dst += 16;
4192 w -= 16;
4193 }
4194
4195 while (w)
4196 {
4197 d = (uint32_t) *dst;
4198
4199 *dst++ = (uint8_t) pack_1x128_32 (
4200 pix_multiply_1x128 (
4201 xmm_alpha,
4202 unpack_32_1x128 (d)));
4203 w--;
4204 }
4205 }
4206
4207}
4208
4209static void
4210sse2_composite_in_8_8 (pixman_implementation_t *imp,
4211 pixman_composite_info_t *info)
4212{
4213 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
4214 uint8_t *dst_line, *dst;
4215 uint8_t *src_line, *src;
4216 int src_stride, dst_stride;
4217 int32_t w;
4218 uint32_t s, d;
4219
4220 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
4221 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4222
4223 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4224 dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
4225 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4226 src_image, src_x, src_y, uint8_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
4227
4228 while (height--)
4229 {
4230 dst = dst_line;
4231 dst_line += dst_stride;
4232 src = src_line;
4233 src_line += src_stride;
4234 w = width;
4235
4236 while (w && ((uintptr_t)dst & 15))
4237 {
4238 s = (uint32_t) *src++;
4239 d = (uint32_t) *dst;
4240
4241 *dst++ = (uint8_t) pack_1x128_32 (
4242 pix_multiply_1x128 (
4243 unpack_32_1x128 (s), unpack_32_1x128 (d)));
4244 w--;
4245 }
4246
4247 while (w >= 16)
4248 {
4249 xmm_src = load_128_unaligned ((__m128i*)src);
4250 xmm_dst = load_128_aligned ((__m128i*)dst);
4251
4252 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
4253 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4254
4255 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
4256 &xmm_dst_lo, &xmm_dst_hi,
4257 &xmm_dst_lo, &xmm_dst_hi);
4258
4259 save_128_aligned (
4260 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4261
4262 src += 16;
4263 dst += 16;
4264 w -= 16;
4265 }
4266
4267 while (w)
4268 {
4269 s = (uint32_t) *src++;
4270 d = (uint32_t) *dst;
4271
4272 *dst++ = (uint8_t) pack_1x128_32 (
4273 pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d)));
4274 w--;
4275 }
4276 }
4277
4278}
4279
4280static void
4281sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
4282 pixman_composite_info_t *info)
4283{
4284 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
4285 uint8_t *dst_line, *dst;
4286 uint8_t *mask_line, *mask;
4287 int dst_stride, mask_stride;
4288 int32_t w;
4289 uint32_t src;
4290 uint32_t d;
4291
4292 __m128i xmm_alpha;
4293 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
4294 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4295
4296 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4297 dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
4298 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
4299 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
4300
4301 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
4302
4303 xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
4304
4305 while (height--)
4306 {
4307 dst = dst_line;
4308 dst_line += dst_stride;
4309 mask = mask_line;
4310 mask_line += mask_stride;
4311 w = width;
4312
4313 while (w && ((uintptr_t)dst & 15))
4314 {
4315 uint8_t m = *mask++;
4316 d = (uint32_t) *dst;
4317
4318 *dst++ = (uint8_t) pack_1x128_32 (
4319 _mm_adds_epu16 (
4320 pix_multiply_1x128 (
4321 xmm_alpha, unpack_32_1x128 (m)),
4322 unpack_32_1x128 (d)));
4323 w--;
4324 }
4325
4326 while (w >= 16)
4327 {
4328 xmm_mask = load_128_unaligned ((__m128i*)mask);
4329 xmm_dst = load_128_aligned ((__m128i*)dst);
4330
4331 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
4332 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4333
4334 pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
4335 &xmm_mask_lo, &xmm_mask_hi,
4336 &xmm_mask_lo, &xmm_mask_hi);
4337
4338 xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
4339 xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
4340
4341 save_128_aligned (
4342 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4343
4344 mask += 16;
4345 dst += 16;
4346 w -= 16;
4347 }
4348
4349 while (w)
4350 {
4351 uint8_t m = (uint32_t) *mask++;
4352 d = (uint32_t) *dst;
4353
4354 *dst++ = (uint8_t) pack_1x128_32 (
4355 _mm_adds_epu16 (
4356 pix_multiply_1x128 (
4357 xmm_alpha, unpack_32_1x128 (m)),
4358 unpack_32_1x128 (d)));
4359
4360 w--;
4361 }
4362 }
4363
4364}
4365
4366static void
4367sse2_composite_add_n_8 (pixman_implementation_t *imp,
4368 pixman_composite_info_t *info)
4369{
4370 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
4371 uint8_t *dst_line, *dst;
4372 int dst_stride;
4373 int32_t w;
4374 uint32_t src;
4375
4376 __m128i xmm_src;
4377
4378 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4379 dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
4380
4381 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
4382
4383 src >>= 24;
4384
4385 if (src == 0x00)
4386 return;
4387
4388 if (src == 0xff)
4389 {
4390 pixman_fill_moz_pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
4391 8, dest_x, dest_y, width, height, 0xff);
4392
4393 return;
4394 }
4395
4396 src = (src << 24) | (src << 16) | (src << 8) | src;
4397 xmm_src = _mm_set_epi32 (src, src, src, src);
4398
4399 while (height--)
4400 {
4401 dst = dst_line;
4402 dst_line += dst_stride;
4403 w = width;
4404
4405 while (w && ((uintptr_t)dst & 15))
4406 {
4407 *dst = (uint8_t)_mm_cvtsi128_si32 (
4408 _mm_adds_epu8 (
4409 xmm_src,
4410 _mm_cvtsi32_si128 (*dst)));
4411
4412 w--;
4413 dst++;
4414 }
4415
4416 while (w >= 16)
4417 {
4418 save_128_aligned (
4419 (__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
4420
4421 dst += 16;
4422 w -= 16;
4423 }
4424
4425 while (w)
4426 {
4427 *dst = (uint8_t)_mm_cvtsi128_si32 (
4428 _mm_adds_epu8 (
4429 xmm_src,
4430 _mm_cvtsi32_si128 (*dst)));
4431
4432 w--;
4433 dst++;
4434 }
4435 }
4436
4437}
4438
4439static void
4440sse2_composite_add_8_8 (pixman_implementation_t *imp,
4441 pixman_composite_info_t *info)
4442{
4443 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
4444 uint8_t *dst_line, *dst;
4445 uint8_t *src_line, *src;
4446 int dst_stride, src_stride;
4447 int32_t w;
4448 uint16_t t;
4449
4450 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4451 src_image, src_x, src_y, uint8_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
4452 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4453 dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
4454
4455 while (height--)
4456 {
4457 dst = dst_line;
4458 src = src_line;
4459
4460 dst_line += dst_stride;
4461 src_line += src_stride;
4462 w = width;
4463
4464 /* Small head */
4465 while (w && (uintptr_t)dst & 3)
4466 {
4467 t = (*dst) + (*src++);
4468 *dst++ = t | (0 - (t >> 8));
4469 w--;
4470 }
4471
4472 sse2_combine_add_u (imp, op,
4473 (uint32_t*)dst, (uint32_t*)src, NULL((void*)0), w >> 2);
4474
4475 /* Small tail */
4476 dst += w & 0xfffc;
4477 src += w & 0xfffc;
4478
4479 w &= 3;
4480
4481 while (w)
4482 {
4483 t = (*dst) + (*src++);
4484 *dst++ = t | (0 - (t >> 8));
4485 w--;
4486 }
4487 }
4488
4489}
4490
4491static void
4492sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
4493 pixman_composite_info_t *info)
4494{
4495 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
4496 uint32_t *dst_line, *dst;
4497 uint32_t *src_line, *src;
4498 int dst_stride, src_stride;
4499
4500 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4501 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
4502 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4503 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
4504
4505 while (height--)
4506 {
4507 dst = dst_line;
4508 dst_line += dst_stride;
4509 src = src_line;
4510 src_line += src_stride;
4511
4512 sse2_combine_add_u (imp, op, dst, src, NULL((void*)0), width);
4513 }
4514}
4515
4516static void
4517sse2_composite_add_n_8888 (pixman_implementation_t *imp,
4518 pixman_composite_info_t *info)
4519{
4520 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
4521 uint32_t *dst_line, *dst, src;
4522 int dst_stride;
4523
4524 __m128i xmm_src;
4525
4526 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
4527
4528 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
4529 if (src == 0)
4530 return;
4531
4532 if (src == ~0)
4533 {
4534 pixman_fill_moz_pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32,
4535 dest_x, dest_y, width, height, ~0);
4536
4537 return;
4538 }
4539
4540 xmm_src = _mm_set_epi32 (src, src, src, src);
4541 while (height--)
4542 {
4543 int w = width;
4544 uint32_t d;
4545
4546 dst = dst_line;
4547 dst_line += dst_stride;
4548
4549 while (w && (uintptr_t)dst & 15)
4550 {
4551 d = *dst;
4552 *dst++ =
4553 _mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d)));
4554 w--;
4555 }
4556
4557 while (w >= 4)
4558 {
4559 save_128_aligned
4560 ((__m128i*)dst,
4561 _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
4562
4563 dst += 4;
4564 w -= 4;
4565 }
4566
4567 while (w--)
4568 {
4569 d = *dst;
4570 *dst++ =
4571 _mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src,
4572 _mm_cvtsi32_si128 (d)));
4573 }
4574 }
4575}
4576
4577static void
4578sse2_composite_add_n_8_8888 (pixman_implementation_t *imp,
4579 pixman_composite_info_t *info)
4580{
4581 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
4582 uint32_t *dst_line, *dst;
4583 uint8_t *mask_line, *mask;
4584 int dst_stride, mask_stride;
4585 int32_t w;
4586 uint32_t src;
4587
4588 __m128i xmm_src;
4589
4590 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
4591 if (src == 0)
4592 return;
4593 xmm_src = expand_pixel_32_1x128 (src);
4594
4595 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4596 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
4597 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
4598 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
4599
4600 while (height--)
4601 {
4602 dst = dst_line;
4603 dst_line += dst_stride;
4604 mask = mask_line;
4605 mask_line += mask_stride;
4606 w = width;
4607
4608 while (w && ((uintptr_t)dst & 15))
4609 {
4610 uint8_t m = *mask++;
4611 if (m)
4612 {
4613 *dst = pack_1x128_32
4614 (_mm_adds_epu16
4615 (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
4616 unpack_32_1x128 (*dst)));
4617 }
4618 dst++;
4619 w--;
4620 }
4621
4622 while (w >= 4)
4623 {
4624 uint32_t m;
4625 memcpy(&m, mask, sizeof(uint32_t));
4626
4627 if (m)
4628 {
4629 __m128i xmm_mask_lo, xmm_mask_hi;
4630 __m128i xmm_dst_lo, xmm_dst_hi;
4631
4632 __m128i xmm_dst = load_128_aligned ((__m128i*)dst);
4633 __m128i xmm_mask =
4634 _mm_unpacklo_epi8 (unpack_32_1x128(m),
4635 _mm_setzero_si128 ());
4636
4637 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
4638 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4639
4640 expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
4641 &xmm_mask_lo, &xmm_mask_hi);
4642
4643 pix_multiply_2x128 (&xmm_src, &xmm_src,
4644 &xmm_mask_lo, &xmm_mask_hi,
4645 &xmm_mask_lo, &xmm_mask_hi);
4646
4647 xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
4648 xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
4649
4650 save_128_aligned (
4651 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4652 }
4653
4654 w -= 4;
4655 dst += 4;
4656 mask += 4;
4657 }
4658
4659 while (w)
4660 {
4661 uint8_t m = *mask++;
4662 if (m)
4663 {
4664 *dst = pack_1x128_32
4665 (_mm_adds_epu16
4666 (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
4667 unpack_32_1x128 (*dst)));
4668 }
4669 dst++;
4670 w--;
4671 }
4672 }
4673}
4674
4675static pixman_bool_t
4676sse2_blt (pixman_implementation_t *imp,
4677 uint32_t * src_bits,
4678 uint32_t * dst_bits,
4679 int src_stride,
4680 int dst_stride,
4681 int src_bpp,
4682 int dst_bpp,
4683 int src_x,
4684 int src_y,
4685 int dest_x,
4686 int dest_y,
4687 int width,
4688 int height)
4689{
4690 uint8_t * src_bytes;
4691 uint8_t * dst_bytes;
4692 int byte_width;
4693
4694 if (src_bpp != dst_bpp)
4695 return FALSE0;
4696
4697 if (src_bpp == 16)
4698 {
4699 src_stride = src_stride * (int) sizeof (uint32_t) / 2;
4700 dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
4701 src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
4702 dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
4703 byte_width = 2 * width;
4704 src_stride *= 2;
4705 dst_stride *= 2;
4706 }
4707 else if (src_bpp == 32)
4708 {
4709 src_stride = src_stride * (int) sizeof (uint32_t) / 4;
4710 dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
4711 src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
4712 dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
4713 byte_width = 4 * width;
4714 src_stride *= 4;
4715 dst_stride *= 4;
4716 }
4717 else
4718 {
4719 return FALSE0;
4720 }
4721
4722 while (height--)
4723 {
4724 int w;
4725 uint8_t *s = src_bytes;
4726 uint8_t *d = dst_bytes;
4727 src_bytes += src_stride;
4728 dst_bytes += dst_stride;
4729 w = byte_width;
4730
4731 while (w >= 2 && ((uintptr_t)d & 3))
4732 {
4733 memmove(d, s, 2);
4734 w -= 2;
4735 s += 2;
4736 d += 2;
4737 }
4738
4739 while (w >= 4 && ((uintptr_t)d & 15))
4740 {
4741 memmove(d, s, 4);
4742
4743 w -= 4;
4744 s += 4;
4745 d += 4;
4746 }
4747
4748 while (w >= 64)
4749 {
4750 __m128i xmm0, xmm1, xmm2, xmm3;
4751
4752 xmm0 = load_128_unaligned ((__m128i*)(s));
4753 xmm1 = load_128_unaligned ((__m128i*)(s + 16));
4754 xmm2 = load_128_unaligned ((__m128i*)(s + 32));
4755 xmm3 = load_128_unaligned ((__m128i*)(s + 48));
4756
4757 save_128_aligned ((__m128i*)(d), xmm0);
4758 save_128_aligned ((__m128i*)(d + 16), xmm1);
4759 save_128_aligned ((__m128i*)(d + 32), xmm2);
4760 save_128_aligned ((__m128i*)(d + 48), xmm3);
4761
4762 s += 64;
4763 d += 64;
4764 w -= 64;
4765 }
4766
4767 while (w >= 16)
4768 {
4769 save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) );
4770
4771 w -= 16;
4772 d += 16;
4773 s += 16;
4774 }
4775
4776 while (w >= 4)
4777 {
4778 memmove(d, s, 4);
4779
4780 w -= 4;
4781 s += 4;
4782 d += 4;
4783 }
4784
4785 if (w >= 2)
4786 {
4787 memmove(d, s, 2);
4788 w -= 2;
4789 s += 2;
4790 d += 2;
4791 }
4792 }
4793
4794 return TRUE1;
4795}
4796
4797static void
4798sse2_composite_copy_area (pixman_implementation_t *imp,
4799 pixman_composite_info_t *info)
4800{
4801 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
4802 sse2_blt (imp, src_image->bits.bits,
4803 dest_image->bits.bits,
4804 src_image->bits.rowstride,
4805 dest_image->bits.rowstride,
4806 PIXMAN_FORMAT_BPP (src_image->bits.format)(((src_image->bits.format >> (24)) & ((1 <<
(8)) - 1)) << ((src_image->bits.format >> 22)
& 3))
,
4807 PIXMAN_FORMAT_BPP (dest_image->bits.format)(((dest_image->bits.format >> (24)) & ((1 <<
(8)) - 1)) << ((dest_image->bits.format >> 22
) & 3))
,
4808 src_x, src_y, dest_x, dest_y, width, height);
4809}
4810
4811static void
4812sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
4813 pixman_composite_info_t *info)
4814{
4815 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
4816 uint32_t *src, *src_line, s;
4817 uint32_t *dst, *dst_line, d;
4818 uint8_t *mask, *mask_line;
4819 int src_stride, mask_stride, dst_stride;
4820 int32_t w;
4821 __m128i ms;
4822
4823 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
4824 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4825 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
4826
4827 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4828 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
4829 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
4830 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
4831 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4832 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
4833
4834 while (height--)
4835 {
4836 src = src_line;
4837 src_line += src_stride;
4838 dst = dst_line;
4839 dst_line += dst_stride;
4840 mask = mask_line;
4841 mask_line += mask_stride;
4842
4843 w = width;
4844
4845 while (w && (uintptr_t)dst & 15)
4846 {
4847 uint8_t m = *mask++;
4848 s = 0xff000000 | *src++;
4849 d = *dst;
4850 ms = unpack_32_1x128 (s);
4851
4852 if (m != 0xff)
4853 {
4854 __m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
4855 __m128i md = unpack_32_1x128 (d);
4856
4857 ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md);
4858 }
4859
4860 *dst++ = pack_1x128_32 (ms);
4861 w--;
4862 }
4863
4864 while (w >= 4)
4865 {
4866 uint32_t m;
4867 memcpy(&m, mask, sizeof(uint32_t));
4868 xmm_src = _mm_or_si128 (
4869 load_128_unaligned ((__m128i*)src), mask_ff000000);
4870
4871 if (m == 0xffffffff)
4872 {
4873 save_128_aligned ((__m128i*)dst, xmm_src);
4874 }
4875 else
4876 {
4877 xmm_dst = load_128_aligned ((__m128i*)dst);
4878
4879 xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
4880
4881 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
4882 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
4883 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4884
4885 expand_alpha_rev_2x128 (
4886 xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
4887
4888 in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
4889 &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi,
4890 &xmm_dst_lo, &xmm_dst_hi);
4891
4892 save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4893 }
4894
4895 src += 4;
4896 dst += 4;
4897 mask += 4;
4898 w -= 4;
4899 }
4900
4901 while (w)
4902 {
4903 uint8_t m = *mask++;
4904
4905 if (m)
4906 {
4907 s = 0xff000000 | *src;
4908
4909 if (m == 0xff)
4910 {
4911 *dst = s;
4912 }
4913 else
4914 {
4915 __m128i ma, md, ms;
4916
4917 d = *dst;
4918
4919 ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
4920 md = unpack_32_1x128 (d);
4921 ms = unpack_32_1x128 (s);
4922
4923 *dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md));
4924 }
4925
4926 }
4927
4928 src++;
4929 dst++;
4930 w--;
4931 }
4932 }
4933
4934}
4935
4936static void
4937sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
4938 pixman_composite_info_t *info)
4939{
4940 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
4941 uint32_t *src, *src_line, s;
4942 uint32_t *dst, *dst_line, d;
4943 uint8_t *mask, *mask_line;
4944 int src_stride, mask_stride, dst_stride;
4945 int32_t w;
4946
4947 __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
4948 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4949 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
4950
4951 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4952 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
4953 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
4954 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
4955 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4956 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
4957
4958 while (height--)
4959 {
4960 src = src_line;
4961 src_line += src_stride;
4962 dst = dst_line;
4963 dst_line += dst_stride;
4964 mask = mask_line;
4965 mask_line += mask_stride;
4966
4967 w = width;
4968
4969 while (w && (uintptr_t)dst & 15)
4970 {
4971 uint32_t sa;
4972 uint8_t m = *mask++;
4973
4974 s = *src++;
4975 d = *dst;
4976
4977 sa = s >> 24;
4978
4979 if (m)
4980 {
4981 if (sa == 0xff && m == 0xff)
4982 {
4983 *dst = s;
4984 }
4985 else
4986 {
4987 __m128i ms, md, ma, msa;
4988
4989 ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
4990 ms = unpack_32_1x128 (s);
4991 md = unpack_32_1x128 (d);
4992
4993 msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
4994
4995 *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
4996 }
4997 }
4998
4999 dst++;
5000 w--;
5001 }
5002
5003 while (w >= 4)
5004 {
5005 uint32_t m;
5006 memcpy(&m, mask, sizeof(uint32_t));
5007
5008 if (m)
5009 {
5010 xmm_src = load_128_unaligned ((__m128i*)src);
5011
5012 if (m == 0xffffffff && is_opaque (xmm_src))
5013 {
5014 save_128_aligned ((__m128i *)dst, xmm_src);
5015 }
5016 else
5017 {
5018 xmm_dst = load_128_aligned ((__m128i *)dst);
5019
5020 xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
5021
5022 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
5023 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
5024 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5025
5026 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
5027 expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
5028
5029 in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
5030 &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
5031
5032 save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5033 }
5034 }
5035
5036 src += 4;
5037 dst += 4;
5038 mask += 4;
5039 w -= 4;
5040 }
5041
5042 while (w)
5043 {
5044 uint32_t sa;
5045 uint8_t m = *mask++;
5046
5047 s = *src++;
5048 d = *dst;
5049
5050 sa = s >> 24;
5051
5052 if (m)
5053 {
5054 if (sa == 0xff && m == 0xff)
5055 {
5056 *dst = s;
5057 }
5058 else
5059 {
5060 __m128i ms, md, ma, msa;
5061
5062 ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
5063 ms = unpack_32_1x128 (s);
5064 md = unpack_32_1x128 (d);
5065
5066 msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
5067
5068 *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
5069 }
5070 }
5071
5072 dst++;
5073 w--;
5074 }
5075 }
5076
5077}
5078
5079static void
5080sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
5081 pixman_composite_info_t *info)
5082{
5083 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
5084 uint32_t src;
5085 uint32_t *dst_line, *dst;
5086 __m128i xmm_src;
5087 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
5088 __m128i xmm_dsta_hi, xmm_dsta_lo;
5089 int dst_stride;
5090 int32_t w;
5091
5092 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
5093
5094 if (src == 0)
5095 return;
5096
5097 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
5098 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
5099
5100 xmm_src = expand_pixel_32_1x128 (src);
5101
5102 while (height--)
5103 {
5104 dst = dst_line;
5105
5106 dst_line += dst_stride;
5107 w = width;
5108
5109 while (w && (uintptr_t)dst & 15)
5110 {
5111 __m128i vd;
5112
5113 vd = unpack_32_1x128 (*dst);
5114
5115 *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
5116 xmm_src));
5117 w--;
5118 dst++;
5119 }
5120
5121 while (w >= 4)
5122 {
5123 __m128i tmp_lo, tmp_hi;
5124
5125 xmm_dst = load_128_aligned ((__m128i*)dst);
5126
5127 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5128 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi);
5129
5130 tmp_lo = xmm_src;
5131 tmp_hi = xmm_src;
5132
5133 over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
5134 &xmm_dsta_lo, &xmm_dsta_hi,
5135 &tmp_lo, &tmp_hi);
5136
5137 save_128_aligned (
5138 (__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi));
5139
5140 w -= 4;
5141 dst += 4;
5142 }
5143
5144 while (w)
5145 {
5146 __m128i vd;
5147
5148 vd = unpack_32_1x128 (*dst);
5149
5150 *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
5151 xmm_src));
5152 w--;
5153 dst++;
5154 }
5155
5156 }
5157
5158}
5159
5160static void
5161sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
5162 pixman_composite_info_t *info)
5163{
5164 PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height
;
5165 uint32_t *src, *src_line, s;
5166 uint32_t *dst, *dst_line, d;
5167 uint32_t *mask, *mask_line;
5168 uint32_t m;
5169 int src_stride, mask_stride, dst_stride;
5170 int32_t w;
5171
5172 __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
5173 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
5174 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
5175
5176 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
5177 dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
;
5178 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
5179 mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
5180 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
5181 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
5182
5183 while (height--)
5184 {
5185 src = src_line;
5186 src_line += src_stride;
5187 dst = dst_line;
5188 dst_line += dst_stride;
5189 mask = mask_line;
5190 mask_line += mask_stride;
5191
5192 w = width;
5193
5194 while (w && (uintptr_t)dst & 15)
5195 {
5196 uint32_t sa;
5197
5198 s = *src++;
5199 m = (*mask++) >> 24;
5200 d = *dst;
5201
5202 sa = s >> 24;
5203
5204 if (m)
5205 {
5206 if (sa == 0xff && m == 0xff)
5207 {
5208 *dst = s;
5209 }
5210 else
5211 {
5212 __m128i ms, md, ma, msa;
5213
5214 ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
5215 ms = unpack_32_1x128 (s);
5216 md = unpack_32_1x128 (d);
5217
5218 msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
5219
5220 *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
5221 }
5222 }
5223
5224 dst++;
5225 w--;
5226 }
5227
5228 while (w >= 4)
5229 {
5230 xmm_mask = load_128_unaligned ((__m128i*)mask);
5231
5232 if (!is_transparent (xmm_mask))
5233 {
5234 xmm_src = load_128_unaligned ((__m128i*)src);
5235
5236 if (is_opaque (xmm_mask) && is_opaque (xmm_src))
5237 {
5238 save_128_aligned ((__m128i *)dst, xmm_src);
5239 }
5240 else
5241 {
5242 xmm_dst = load_128_aligned ((__m128i *)dst);
5243
5244 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
5245 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
5246 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5247
5248 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
5249 expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
5250
5251 in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
5252 &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
5253
5254 save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5255 }
5256 }
5257
5258 src += 4;
5259 dst += 4;
5260 mask += 4;
5261 w -= 4;
5262 }
5263
5264 while (w)
5265 {
5266 uint32_t sa;
5267
5268 s = *src++;
5269 m = (*mask++) >> 24;
5270 d = *dst;
5271
5272 sa = s >> 24;
5273
5274 if (m)
5275 {
5276 if (sa == 0xff && m == 0xff)
5277 {
5278 *dst = s;
5279 }
5280 else
5281 {
5282 __m128i ms, md, ma, msa;
5283
5284 ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
5285 ms = unpack_32_1x128 (s);
5286 md = unpack_32_1x128 (d);
5287
5288 msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
5289
5290 *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
5291 }
5292 }
5293
5294 dst++;
5295 w--;
5296 }
5297 }
5298
5299}
5300
5301/* A variant of 'sse2_combine_over_u' with minor tweaks */
5302static force_inline__inline__ __attribute__ ((__always_inline__)) void
5303scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
5304 const uint32_t* ps,
5305 int32_t w,
5306 pixman_fixed_t vx,
5307 pixman_fixed_t unit_x,
5308 pixman_fixed_t src_width_fixed,
5309 pixman_bool_t fully_transparent_src)
5310{
5311 uint32_t s, d;
5312 const uint32_t* pm = NULL((void*)0);
5313
5314 __m128i xmm_dst_lo, xmm_dst_hi;
5315 __m128i xmm_src_lo, xmm_src_hi;
5316 __m128i xmm_alpha_lo, xmm_alpha_hi;
5317
5318 if (fully_transparent_src)
5319 return;
5320
5321 /* Align dst on a 16-byte boundary */
5322 while (w && ((uintptr_t)pd & 15))
5323 {
5324 d = *pd;
5325 s = combine1 (ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)), pm);
5326 vx += unit_x;
5327 while (vx >= 0)
5328 vx -= src_width_fixed;
5329
5330 *pd++ = core_combine_over_u_pixel_sse2 (s, d);
5331 if (pm)
5332 pm++;
5333 w--;
5334 }
5335
5336 while (w >= 4)
5337 {
5338 __m128i tmp;
5339 uint32_t tmp1, tmp2, tmp3, tmp4;
5340
5341 tmp1 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5342 vx += unit_x;
5343 while (vx >= 0)
5344 vx -= src_width_fixed;
5345 tmp2 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5346 vx += unit_x;
5347 while (vx >= 0)
5348 vx -= src_width_fixed;
5349 tmp3 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5350 vx += unit_x;
5351 while (vx >= 0)
5352 vx -= src_width_fixed;
5353 tmp4 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5354 vx += unit_x;
5355 while (vx >= 0)
5356 vx -= src_width_fixed;
5357
5358 tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
5359
5360 xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm);
5361
5362 if (is_opaque (xmm_src_hi))
5363 {
5364 save_128_aligned ((__m128i*)pd, xmm_src_hi);
5365 }
5366 else if (!is_zero (xmm_src_hi))
5367 {
5368 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
5369
5370 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
5371 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
5372
5373 expand_alpha_2x128 (
5374 xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
5375
5376 over_2x128 (&xmm_src_lo, &xmm_src_hi,
5377 &xmm_alpha_lo, &xmm_alpha_hi,
5378 &xmm_dst_lo, &xmm_dst_hi);
5379
5380 /* rebuid the 4 pixel data and save*/
5381 save_128_aligned ((__m128i*)pd,
5382 pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5383 }
5384
5385 w -= 4;
5386 pd += 4;
5387 if (pm)
5388 pm += 4;
5389 }
5390
5391 while (w)
5392 {
5393 d = *pd;
5394 s = combine1 (ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)), pm);
5395 vx += unit_x;
5396 while (vx >= 0)
5397 vx -= src_width_fixed;
5398
5399 *pd++ = core_combine_over_u_pixel_sse2 (s, d);
5400 if (pm)
5401 pm++;
5402
5403 w--;
5404 }
5405}
5406
5407FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.height) << 16)); repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
* y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5408 scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.height) << 16)); repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
* y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5409 uint32_t, uint32_t, COVER)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.height) << 16)); repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
* y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5410FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER (
pixman_implementation_t *imp, pixman_composite_info_t *info) {
__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5411 scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER (
pixman_implementation_t *imp, pixman_composite_info_t *info) {
__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5412 uint32_t, uint32_t, NONE)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER (
pixman_implementation_t *imp, pixman_composite_info_t *info) {
__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5413FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER (
pixman_implementation_t *imp, pixman_composite_info_t *info) {
__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5414 scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER (
pixman_implementation_t *imp, pixman_composite_info_t *info) {
__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5415 uint32_t, uint32_t, PAD)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER (
pixman_implementation_t *imp, pixman_composite_info_t *info) {
__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5416FAST_NEAREST_MAINLOOP (sse2_8888_8888_normal_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
void fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy =
((pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL ==
PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 };
if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5417 scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
void fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy =
((pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL ==
PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 };
if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5418 uint32_t, uint32_t, NORMAL)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
void fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy =
((pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL ==
PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 };
if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5419
5420static force_inline__inline__ __attribute__ ((__always_inline__)) void
5421scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
5422 uint32_t * dst,
5423 const uint32_t * src,
5424 int32_t w,
5425 pixman_fixed_t vx,
5426 pixman_fixed_t unit_x,
5427 pixman_fixed_t src_width_fixed,
5428 pixman_bool_t zero_src)
5429{
5430 __m128i xmm_mask;
5431 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
5432 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
5433 __m128i xmm_alpha_lo, xmm_alpha_hi;
5434
5435 if (zero_src || (*mask >> 24) == 0)
5436 return;
5437
5438 xmm_mask = create_mask_16_128 (*mask >> 24);
5439
5440 while (w && (uintptr_t)dst & 15)
5441 {
5442 uint32_t s = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5443 vx += unit_x;
5444 while (vx >= 0)
5445 vx -= src_width_fixed;
5446
5447 if (s)
5448 {
5449 uint32_t d = *dst;
5450
5451 __m128i ms = unpack_32_1x128 (s);
5452 __m128i alpha = expand_alpha_1x128 (ms);
5453 __m128i dest = xmm_mask;
5454 __m128i alpha_dst = unpack_32_1x128 (d);
5455
5456 *dst = pack_1x128_32 (
5457 in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
5458 }
5459 dst++;
5460 w--;
5461 }
5462
5463 while (w >= 4)
5464 {
5465 uint32_t tmp1, tmp2, tmp3, tmp4;
5466
5467 tmp1 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5468 vx += unit_x;
5469 while (vx >= 0)
5470 vx -= src_width_fixed;
5471 tmp2 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5472 vx += unit_x;
5473 while (vx >= 0)
5474 vx -= src_width_fixed;
5475 tmp3 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5476 vx += unit_x;
5477 while (vx >= 0)
5478 vx -= src_width_fixed;
5479 tmp4 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5480 vx += unit_x;
5481 while (vx >= 0)
5482 vx -= src_width_fixed;
5483
5484 xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
5485
5486 if (!is_zero (xmm_src))
5487 {
5488 xmm_dst = load_128_aligned ((__m128i*)dst);
5489
5490 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
5491 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5492 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
5493 &xmm_alpha_lo, &xmm_alpha_hi);
5494
5495 in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
5496 &xmm_alpha_lo, &xmm_alpha_hi,
5497 &xmm_mask, &xmm_mask,
5498 &xmm_dst_lo, &xmm_dst_hi);
5499
5500 save_128_aligned (
5501 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5502 }
5503
5504 dst += 4;
5505 w -= 4;
5506 }
5507
5508 while (w)
5509 {
5510 uint32_t s = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5511 vx += unit_x;
5512 while (vx >= 0)
5513 vx -= src_width_fixed;
5514
5515 if (s)
5516 {
5517 uint32_t d = *dst;
5518
5519 __m128i ms = unpack_32_1x128 (s);
5520 __m128i alpha = expand_alpha_1x128 (ms);
5521 __m128i mask = xmm_mask;
5522 __m128i dest = unpack_32_1x128 (d);
5523
5524 *dst = pack_1x128_32 (
5525 in_over_1x128 (&ms, &alpha, &mask, &dest));
5526 }
5527
5528 dst++;
5529 w--;
5530 }
5531
5532}
5533
5534FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.height) << 16)); repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
* y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5535 scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.height) << 16)); repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
* y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5536 uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.height) << 16)); repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
* y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5537FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5538 scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5539 uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5540FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5541 scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5542 uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5543FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy =
((pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL ==
PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 };
if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5544 scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy =
((pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL ==
PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 };
if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5545 uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
= dest_image->bits.rowstride; (dst_stride) = __stride__ *
(int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
(dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); else do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy =
((pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
+ src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL ==
PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 };
if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
+ 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
(mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5546
5547#if PSHUFD_IS_FAST0
5548
5549/***********************************************************************************/
5550
5551# define BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
* 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
* 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
(); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
1), vx, -(vx + 1), vx, -(vx + 1))
\
5552 const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
5553 const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
5554 const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \
5555 const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \
5556 unit_x, -unit_x, unit_x, -unit_x); \
5557 const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \
5558 unit_x * 4, -unit_x * 4, \
5559 unit_x * 4, -unit_x * 4, \
5560 unit_x * 4, -unit_x * 4); \
5561 const __m128i xmm_zero = _mm_setzero_si128 (); \
5562 __m128i xmm_x = _mm_set_epi16 (vx + unit_x * 3, -(vx + 1) - unit_x * 3, \
5563 vx + unit_x * 2, -(vx + 1) - unit_x * 2, \
5564 vx + unit_x * 1, -(vx + 1) - unit_x * 1, \
5565 vx + unit_x * 0, -(vx + 1) - unit_x * 0); \
5566 __m128i xmm_wh_state;
5567
5568#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase_)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
(blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
- 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); pix = _mm_srli_epi32 (xmm_a, 7 * 2); }
while (0)
\
5569do { \
5570 int phase = phase_; \
5571 __m128i xmm_wh, xmm_a, xmm_b; \
5572 /* fetch 2x2 pixel block into sse2 registers */ \
5573 __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \
5574 __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \
5575 vx += unit_x; \
5576 /* vertical interpolation */ \
5577 xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \
5578 xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \
5579 xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \
5580 /* calculate horizontal weights */ \
5581 if (phase <= 0) \
5582 { \
5583 xmm_wh_state = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \
5584 16 - BILINEAR_INTERPOLATION_BITS7)); \
5585 xmm_x = _mm_add_epi16 (xmm_x, (phase < 0) ? xmm_ux1 : xmm_ux4); \
5586 phase = 0; \
5587 } \
5588 xmm_wh = _mm_shuffle_epi32 (xmm_wh_state, _MM_SHUFFLE (phase, phase, \((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(xmm_wh_state
), (int)((((phase) << 6) | ((phase) << 4) | ((phase
) << 2) | (phase)))))
5589 phase, phase))((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(xmm_wh_state
), (int)((((phase) << 6) | ((phase) << 4) | ((phase
) << 2) | (phase)))))
; \
5590 /* horizontal interpolation */ \
5591 xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(xmm_a), (int
)((((1) << 6) | ((0) << 4) | ((3) << 2) | (
2)))))
5592 xmm_a, _MM_SHUFFLE (1, 0, 3, 2))((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(xmm_a), (int
)((((1) << 6) | ((0) << 4) | ((3) << 2) | (
2)))))
, xmm_a), xmm_wh); \
5593 /* shift the result */ \
5594 pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS7 * 2); \
5595} while (0)
5596
5597#else /************************************************************************/
5598
5599# define BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
* 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
* 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
(); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
1), vx, -(vx + 1), vx, -(vx + 1))
\
5600 const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
5601 const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
5602 const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \
5603 const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \
5604 unit_x, -unit_x, unit_x, -unit_x); \
5605 const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \
5606 unit_x * 4, -unit_x * 4, \
5607 unit_x * 4, -unit_x * 4, \
5608 unit_x * 4, -unit_x * 4); \
5609 const __m128i xmm_zero = _mm_setzero_si128 (); \
5610 __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \
5611 vx, -(vx + 1), vx, -(vx + 1))
5612
5613#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
(blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
- 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); pix = _mm_srli_epi32 (xmm_a, 7 * 2); }
while (0)
\
5614do { \
5615 __m128i xmm_wh, xmm_a, xmm_b; \
5616 /* fetch 2x2 pixel block into sse2 registers */ \
5617 __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \
5618 __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \
5619 (void)xmm_ux4; /* suppress warning: unused variable 'xmm_ux4' */ \
5620 vx += unit_x; \
5621 /* vertical interpolation */ \
5622 xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \
5623 xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \
5624 xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \
5625 /* calculate horizontal weights */ \
5626 xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \
5627 16 - BILINEAR_INTERPOLATION_BITS7)); \
5628 xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \
5629 /* horizontal interpolation */ \
5630 xmm_b = _mm_unpacklo_epi64 (/* any value is fine here */ xmm_b, xmm_a); \
5631 xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); \
5632 /* shift the result */ \
5633 pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS7 * 2); \
5634} while (0)
5635
5636/***********************************************************************************/
5637
5638#endif
5639
5640#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
; \
5641do { \
5642 __m128i xmm_pix; \
5643 BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix, -1)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
(blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
- 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2
); } while (0)
; \
5644 xmm_pix = _mm_packs_epi32 (xmm_pix, xmm_pix); \
5645 xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); \
5646 pix = _mm_cvtsi128_si32 (xmm_pix); \
5647} while(0)
5648
5649#define BILINEAR_INTERPOLATE_FOUR_PIXELS(pix); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i
xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i
*)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
(blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
- 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr
= _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);
__m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx
>> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1
, xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);
pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0)
; \
5650do { \
5651 __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; \
5652 BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix1, 0)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
(blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
- 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
2); } while (0)
; \
5653 BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix2, 1)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
(blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
- 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 *
2); } while (0)
; \
5654 BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix3, 2)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
(blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
- 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 *
2); } while (0)
; \
5655 BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix4, 3)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
(blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
- 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 *
2); } while (0)
; \
5656 xmm_pix1 = _mm_packs_epi32 (xmm_pix1, xmm_pix2); \
5657 xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); \
5658 pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3); \
5659} while(0)
5660
5661#define BILINEAR_SKIP_ONE_PIXEL()do { vx += unit_x; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); } while
(0)
\
5662do { \
5663 vx += unit_x; \
5664 xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \
5665} while(0)
5666
5667#define BILINEAR_SKIP_FOUR_PIXELS()do { vx += unit_x * 4; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4)
; } while(0)
\
5668do { \
5669 vx += unit_x * 4; \
5670 xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4); \
5671} while(0)
5672
5673/***********************************************************************************/
5674
5675static force_inline__inline__ __attribute__ ((__always_inline__)) void
5676scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
5677 const uint32_t * mask,
5678 const uint32_t * src_top,
5679 const uint32_t * src_bottom,
5680 int32_t w,
5681 int wt,
5682 int wb,
5683 pixman_fixed_t vx_,
5684 pixman_fixed_t unit_x_,
5685 pixman_fixed_t max_vx,
5686 pixman_bool_t zero_src)
5687{
5688 intptr_t vx = vx_;
5689 intptr_t unit_x = unit_x_;
5690 BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
* 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
* 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
(); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
1), vx, -(vx + 1), vx, -(vx + 1))
;
5691 uint32_t pix1, pix2;
5692
5693 while (w && ((uintptr_t)dst & 15))
5694 {
5695 BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
5696 *dst++ = pix1;
5697 w--;
5698 }
5699
5700 while ((w -= 4) >= 0) {
5701 __m128i xmm_src;
5702 BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i
xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i
*)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
(blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
- 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr
= _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);
__m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx
>> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1
, xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);
xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0)
;
5703 _mm_store_si128 ((__m128i *)dst, xmm_src);
5704 dst += 4;
5705 }
5706
5707 if (w & 2)
5708 {
5709 BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
5710 BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix2
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
5711 *dst++ = pix1;
5712 *dst++ = pix2;
5713 }
5714
5715 if (w & 1)
5716 {
5717 BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
5718 *dst = pix1;
5719 }
5720
5721}
5722
5723FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
+= left_pad; if ((0) & (1 << 2)) mask += left_pad;
} if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
= 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
= 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
= 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
= 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
+ src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
= 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
> 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5724 scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
+= left_pad; if ((0) & (1 << 2)) mask += left_pad;
} if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
= 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
= 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
= 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
= 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
+ src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
= 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
> 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5725 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
+= left_pad; if ((0) & (1 << 2)) mask += left_pad;
} if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
= 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
= 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
= 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
= 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
+ src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
= 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
> 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5726 COVER, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
+= left_pad; if ((0) & (1 << 2)) mask += left_pad;
} if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
= 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
= 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
= 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
= 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
+ src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
= 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
> 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5727FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
(y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5728 scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
(y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5729 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
(y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5730 PAD, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
(y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5731FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
(y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5732 scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
(y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5733 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
(y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5734 NONE, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
(y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5735FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5736 scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5737 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5738 NORMAL, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5739
5740static force_inline__inline__ __attribute__ ((__always_inline__)) void
5741scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst,
5742 const uint32_t * mask,
5743 const uint32_t * src_top,
5744 const uint32_t * src_bottom,
5745 int32_t w,
5746 int wt,
5747 int wb,
5748 pixman_fixed_t vx_,
5749 pixman_fixed_t unit_x_,
5750 pixman_fixed_t max_vx,
5751 pixman_bool_t zero_src)
5752{
5753 intptr_t vx = vx_;
5754 intptr_t unit_x = unit_x_;
5755 BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
* 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
* 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
(); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
1), vx, -(vx + 1), vx, -(vx + 1))
;
5756 uint32_t pix1, pix2;
5757
5758 while (w && ((uintptr_t)dst & 15))
5759 {
5760 BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
5761 *dst++ = pix1 | 0xFF000000;
5762 w--;
5763 }
5764
5765 while ((w -= 4) >= 0) {
5766 __m128i xmm_src;
5767 BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i
xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i
*)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
(blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
- 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr
= _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);
__m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx
>> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1
, xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);
xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0)
;
5768 _mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000));
5769 dst += 4;
5770 }
5771
5772 if (w & 2)
5773 {
5774 BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
5775 BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix2
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
5776 *dst++ = pix1 | 0xFF000000;
5777 *dst++ = pix2 | 0xFF000000;
5778 }
5779
5780 if (w & 1)
5781 {
5782 BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
5783 *dst = pix1 | 0xFF000000;
5784 }
5785}
5786
5787FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_cover_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
+= left_pad; if ((0) & (1 << 2)) mask += left_pad;
} if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
= 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
= 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
= 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
= 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
+ src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
= 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
> 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5788 scaled_bilinear_scanline_sse2_x888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
+= left_pad; if ((0) & (1 << 2)) mask += left_pad;
} if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
= 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
= 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
= 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
= 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
+ src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
= 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
> 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5789 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
+= left_pad; if ((0) & (1 << 2)) mask += left_pad;
} if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
= 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
= 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
= 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
= 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
+ src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
= 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
> 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5790 COVER, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
+= left_pad; if ((0) & (1 << 2)) mask += left_pad;
} if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
= 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
= 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
= 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
= 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
+ src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
= 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
> 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5791FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_pad_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
(y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5792 scaled_bilinear_scanline_sse2_x888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
(y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5793 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
(y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5794 PAD, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
(y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5795FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_normal_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5796 scaled_bilinear_scanline_sse2_x888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5797 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5798 NORMAL, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
width; if ((0) & (1 << 2)) mask += width; vx += width
* unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5799
5800static force_inline__inline__ __attribute__ ((__always_inline__)) void
5801scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
5802 const uint32_t * mask,
5803 const uint32_t * src_top,
5804 const uint32_t * src_bottom,
5805 int32_t w,
5806 int wt,
5807 int wb,
5808 pixman_fixed_t vx_,
5809 pixman_fixed_t unit_x_,
5810 pixman_fixed_t max_vx,
5811 pixman_bool_t zero_src)
5812{
5813 intptr_t vx = vx_;
5814 intptr_t unit_x = unit_x_;
5815 BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
* 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
* 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
(); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
1), vx, -(vx + 1), vx, -(vx + 1))
;
5816 uint32_t pix1, pix2;
5817
5818 while (w && ((uintptr_t)dst & 15))
5819 {
5820 BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
5821
5822 if (pix1)
5823 {
5824 pix2 = *dst;
5825 *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
5826 }
5827
5828 w--;
5829 dst++;
5830 }
5831
5832 while (w >= 4)
5833 {
5834 __m128i xmm_src;
5835 __m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo;
5836 __m128i xmm_alpha_hi, xmm_alpha_lo;
5837
5838 BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i
xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i
*)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
(blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
- 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr
= _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);
__m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx
>> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1
, xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);
xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0)
;
5839
5840 if (!is_zero (xmm_src))
5841 {
5842 if (is_opaque (xmm_src))
5843 {
5844 save_128_aligned ((__m128i *)dst, xmm_src);
5845 }
5846 else
5847 {
5848 __m128i xmm_dst = load_128_aligned ((__m128i *)dst);
5849
5850 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
5851 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5852
5853 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
5854 over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi,
5855 &xmm_dst_lo, &xmm_dst_hi);
5856
5857 save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5858 }
5859 }
5860
5861 w -= 4;
5862 dst += 4;
5863 }
5864
5865 while (w)
5866 {
5867 BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
5868
5869 if (pix1)
5870 {
5871 pix2 = *dst;
5872 *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
5873 }
5874
5875 w--;
5876 dst++;
5877 }
5878}
5879
5880FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (
dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0
); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
< 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5881 scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (
dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0
); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
< 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5882 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (
dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0
); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
< 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5883 COVER, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
*src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
src_stride * y1; src2 = src_first_line + src_stride * y2; if
(left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (
dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0
); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
< 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5884FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5885 scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5886 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5887 PAD, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5888FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5889 scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5890 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5891 NONE, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5892FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
- 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5893 scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
- 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5894 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
- 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5895 NORMAL, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
dest_image->bits.format); mask_stride = 0; } else if ((0)
& (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
(uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
*) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
(0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
(0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
(vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
- 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
+= width; if ((0) & (1 << 2)) mask += width; vx +=
width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
(1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5896
5897static force_inline__inline__ __attribute__ ((__always_inline__)) void
5898scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
5899 const uint8_t * mask,
5900 const uint32_t * src_top,
5901 const uint32_t * src_bottom,
5902 int32_t w,
5903 int wt,
5904 int wb,
5905 pixman_fixed_t vx_,
5906 pixman_fixed_t unit_x_,
5907 pixman_fixed_t max_vx,
5908 pixman_bool_t zero_src)
5909{
5910 intptr_t vx = vx_;
5911 intptr_t unit_x = unit_x_;
5912 BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
* 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
* 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
(); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
1), vx, -(vx + 1), vx, -(vx + 1))
;
5913 uint32_t pix1, pix2;
5914
5915 while (w && ((uintptr_t)dst & 15))
5916 {
5917 uint32_t sa;
5918 uint8_t m = *mask++;
5919
5920 if (m)
5921 {
5922 BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
5923 sa = pix1 >> 24;
5924
5925 if (sa == 0xff && m == 0xff)
5926 {
5927 *dst = pix1;
5928 }
5929 else
5930 {
5931 __m128i ms, md, ma, msa;
5932
5933 pix2 = *dst;
5934 ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
5935 ms = unpack_32_1x128 (pix1);
5936 md = unpack_32_1x128 (pix2);
5937
5938 msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
5939
5940 *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
5941 }
5942 }
5943 else
5944 {
5945 BILINEAR_SKIP_ONE_PIXEL ()do { vx += unit_x; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); } while
(0)
;
5946 }
5947
5948 w--;
5949 dst++;
5950 }
5951
5952 while (w >= 4)
5953 {
5954 uint32_t m;
5955
5956 __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
5957 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
5958 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
5959
5960 memcpy(&m, mask, sizeof(uint32_t));
5961
5962 if (m)
5963 {
5964 BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i
xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i
*)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
(blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
- 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr
= _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);
__m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx
>> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1
, xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);
xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0)
;
5965
5966 if (m == 0xffffffff && is_opaque (xmm_src))
5967 {
5968 save_128_aligned ((__m128i *)dst, xmm_src);
5969 }
5970 else
5971 {
5972 xmm_dst = load_128_aligned ((__m128i *)dst);
5973
5974 xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
5975
5976 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
5977 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
5978 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5979
5980 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
5981 expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
5982
5983 in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
5984 &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
5985
5986 save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5987 }
5988 }
5989 else
5990 {
5991 BILINEAR_SKIP_FOUR_PIXELS ()do { vx += unit_x * 4; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4)
; } while(0)
;
5992 }
5993
5994 w -= 4;
5995 dst += 4;
5996 mask += 4;
5997 }
5998
5999 while (w)
6000 {
6001 uint32_t sa;
6002 uint8_t m = *mask++;
6003
6004 if (m)
6005 {
6006 BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
6007 sa = pix1 >> 24;
6008
6009 if (sa == 0xff && m == 0xff)
6010 {
6011 *dst = pix1;
6012 }
6013 else
6014 {
6015 __m128i ms, md, ma, msa;
6016
6017 pix2 = *dst;
6018 ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
6019 ms = unpack_32_1x128 (pix1);
6020 md = unpack_32_1x128 (pix2);
6021
6022 msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
6023
6024 *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
6025 }
6026 }
6027 else
6028 {
6029 BILINEAR_SKIP_ONE_PIXEL ()do { vx += unit_x; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); } while
(0)
;
6030 }
6031
6032 w--;
6033 dst++;
6034 }
6035}
6036
6037FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
< 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6038 scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
< 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6039 uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
< 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6040 COVER, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
< 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6041FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6042 scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6043 uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6044 PAD, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6045FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6046 scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6047 uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6048 NONE, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6049FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
- 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6050 scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
- 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6051 uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
- 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6052 NORMAL, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
- 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6053
6054static force_inline__inline__ __attribute__ ((__always_inline__)) void
6055scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst,
6056 const uint32_t * mask,
6057 const uint32_t * src_top,
6058 const uint32_t * src_bottom,
6059 int32_t w,
6060 int wt,
6061 int wb,
6062 pixman_fixed_t vx_,
6063 pixman_fixed_t unit_x_,
6064 pixman_fixed_t max_vx,
6065 pixman_bool_t zero_src)
6066{
6067 intptr_t vx = vx_;
6068 intptr_t unit_x = unit_x_;
6069 BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
* 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
* 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
(); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
1), vx, -(vx + 1), vx, -(vx + 1))
;
6070 uint32_t pix1;
6071 __m128i xmm_mask;
6072
6073 if (zero_src || (*mask >> 24) == 0)
6074 return;
6075
6076 xmm_mask = create_mask_16_128 (*mask >> 24);
6077
6078 while (w && ((uintptr_t)dst & 15))
6079 {
6080 BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
6081 if (pix1)
6082 {
6083 uint32_t d = *dst;
6084
6085 __m128i ms = unpack_32_1x128 (pix1);
6086 __m128i alpha = expand_alpha_1x128 (ms);
6087 __m128i dest = xmm_mask;
6088 __m128i alpha_dst = unpack_32_1x128 (d);
6089
6090 *dst = pack_1x128_32
6091 (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
6092 }
6093
6094 dst++;
6095 w--;
6096 }
6097
6098 while (w >= 4)
6099 {
6100 __m128i xmm_src;
6101 BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i
xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i
*)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
(blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
- 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr
= _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);
__m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx
>> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1
, xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);
xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0)
;
6102
6103 if (!is_zero (xmm_src))
6104 {
6105 __m128i xmm_src_lo, xmm_src_hi;
6106 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
6107 __m128i xmm_alpha_lo, xmm_alpha_hi;
6108
6109 xmm_dst = load_128_aligned ((__m128i*)dst);
6110
6111 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
6112 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
6113 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
6114 &xmm_alpha_lo, &xmm_alpha_hi);
6115
6116 in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
6117 &xmm_alpha_lo, &xmm_alpha_hi,
6118 &xmm_mask, &xmm_mask,
6119 &xmm_dst_lo, &xmm_dst_hi);
6120
6121 save_128_aligned
6122 ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
6123 }
6124
6125 dst += 4;
6126 w -= 4;
6127 }
6128
6129 while (w)
6130 {
6131 BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
(_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
(_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
(xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
(xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
= _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
(_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
(xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
= _mm_cvtsi128_si32 (xmm_pix); } while(0)
;
6132 if (pix1)
6133 {
6134 uint32_t d = *dst;
6135
6136 __m128i ms = unpack_32_1x128 (pix1);
6137 __m128i alpha = expand_alpha_1x128 (ms);
6138 __m128i dest = xmm_mask;
6139 __m128i alpha_dst = unpack_32_1x128 (d);
6140
6141 *dst = pack_1x128_32
6142 (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
6143 }
6144
6145 dst++;
6146 w--;
6147 }
6148}
6149
6150FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
< 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6151 scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
< 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6152 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
< 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6153 COVER, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
|| -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
(src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
(-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
< 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
(PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
= src_first_line + src_stride * y1; src_line_bottom = src_first_line
+ src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
(PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
- vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6154FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6155 scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6156 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6157 PAD, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
{ int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6158FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6159 scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6160 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6161 NONE, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
(y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6162FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
- 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6163 scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
- 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6164 uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
- 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6165 NORMAL, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
(pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
__attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
dest_y = info->dest_y; __attribute__((unused)) int32_t width
= info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
*src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
*dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
(imp, mask_image, dest_image->bits.format); mask_stride =
0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
*__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
__stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
__stride__; __bits__ = src_image->bits.bits; __stride__ =
src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
} while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
} if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
= ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
+ 1; if (src_image->bits.width < 64) { src_width = 0; while
(src_width < 64 && src_width <= max_x) src_width
+= src_image->bits.width; need_src_extension = 1; } else {
src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
+= mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
+ 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
= weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
== PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
= src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
- 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
- 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
+ src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
(1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
(width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
{ buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
(1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
* y1; src_line_bottom = src_first_line + src_stride * y2; if
(need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
= &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
- 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
- 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
(width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
- 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
= width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
!= src_width - 1 && width_remain > 0) { num_pixels
= ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
> width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
(dst, mask, src_first_line + src_stride * y1, src_first_line
+ src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6166
6167static const pixman_fast_path_t sse2_fast_paths[] =
6168{
6169 /* PIXMAN_OP_OVER */
6170 PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_0565
}
,
6171 PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_0565
}
,
6172 PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_n_8888
}
,
6173 PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_n_8888
}
,
6174 PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_r5g6b5, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_n_0565 }
,
6175 PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, sse2_composite_over_n_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_b5g6r5, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_n_0565 }
,
6176 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888
}
,
6177 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888
}
,
6178 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888
}
,
6179 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888
}
,
6180 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_0565
}
,
6181 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_0565
}
,
6182 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
}
,
6183 PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
}
,
6184 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
}
,
6185 PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
}
,
6186 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), PIXMAN_a8r8g8b8, (
(PIXMAN_a8r8g8b8 == (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1)
| (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) |
((1) << 16) | ((0) << 12) | ((0) << 8) | (
(0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888_8888
}
,
6187 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
== (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
<< 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1
<< 1) | (1 << 6)), sse2_composite_over_8888_8_8888
}
,
6188 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
== (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
<< 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1
<< 1) | (1 << 6)), sse2_composite_over_8888_8_8888
}
,
6189 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
== (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
<< 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1
<< 1) | (1 << 6)), sse2_composite_over_8888_8_8888
}
,
6190 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
== (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
<< 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1
<< 1) | (1 << 6)), sse2_composite_over_8888_8_8888
}
,
6191 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
== (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
<< 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1
<< 1) | (1 << 6)), sse2_composite_over_x888_8_8888
}
,
6192 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
== (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
<< 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1
<< 1) | (1 << 6)), sse2_composite_over_x888_8_8888
}
,
6193 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
== (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
<< 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1
<< 1) | (1 << 6)), sse2_composite_over_x888_8_8888
}
,
6194 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
== (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
<< 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1
<< 1) | (1 << 6)), sse2_composite_over_x888_8_8888
}
,
6195 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888
}
,
6196 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888
}
,
6197 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888
}
,
6198 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888
}
,
6199 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888
}
,
6200 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888
}
,
6201 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888
}
,
6202 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888
}
,
6203 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((0) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8
== (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_over_n_8888_8888_ca }
,
6204 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((0) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8
== (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_over_n_8888_8888_ca }
,
6205 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((0) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8
== (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_over_n_8888_8888_ca }
,
6206 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((0) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8
== (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_over_n_8888_8888_ca }
,
6207 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((0) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8
== (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_r5g6b5, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_over_n_8888_0565_ca }
,
6208 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((0) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8
== (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_b5g6r5, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_over_n_8888_0565_ca }
,
6209 PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((2) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((2) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
}
,
6210 PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((2) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((2) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
}
,
6211 PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((3) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((3) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
}
,
6212 PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((3) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((3) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
}
,
6213 PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((2) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((2) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_r5g6b5, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_0565
}
,
6214 PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((3) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((3) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_b5g6r5, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_0565
}
,
6215 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
}
,
6216 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
}
,
6217
6218 /* PIXMAN_OP_OVER_REVERSE */
6219 PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888){ PIXMAN_OP_OVER_REVERSE, (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((1 << 2) | (1 << 5) | (1 << 1) |
(1 << 6)) | (((((0) << 24) | ((1) << 16) |
((0) << 12) | ((0) << 8) | ((0) << 4) | ((
0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))), (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_reverse_n_8888
}
,
6220 PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888){ PIXMAN_OP_OVER_REVERSE, (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((1 << 2) | (1 << 5) | (1 << 1) |
(1 << 6)) | (((((0) << 24) | ((1) << 16) |
((0) << 12) | ((0) << 8) | ((0) << 4) | ((
0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))), (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_reverse_n_8888
}
,
6221
6222 /* PIXMAN_OP_ADD */
6223 PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((0) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8
== (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_add_n_8888_8888_ca }
,
6224 PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8){ PIXMAN_OP_ADD, PIXMAN_a8, (((1 << 2) | (1 << 5)
| (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 <<
1) | (1 << 6)), sse2_composite_add_8_8 }
,
6225 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888){ PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_8888_8888
}
,
6226 PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888){ PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_8888_8888
}
,
6227 PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 <<
5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8
}
,
6228 PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 <<
1) | (1 << 6)), sse2_composite_add_n_8 }
,
6229 PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_add_n_8888 }
,
6230 PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_add_n_8888 }
,
6231 PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_add_n_8888 }
,
6232 PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_add_n_8888 }
,
6233 PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888
}
,
6234 PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888
}
,
6235 PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888
}
,
6236 PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888
}
,
6237
6238 /* PIXMAN_OP_SRC */
6239 PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888
}
,
6240 PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888
}
,
6241 PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888
}
,
6242 PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888
}
,
6243 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565
}
,
6244 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565
}
,
6245 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565
}
,
6246 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565
}
,
6247 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_8888
}
,
6248 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_8888
}
,
6249 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
}
,
6250 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
}
,
6251 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
}
,
6252 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
}
,
6253 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
}
,
6254 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
}
,
6255 PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_r5g6b5, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_r5g6b5 == (
((0) << 24) | ((1) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
}
,
6256 PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_b5g6r5, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_b5g6r5 == (
((0) << 24) | ((1) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
}
,
6257
6258 /* PIXMAN_OP_IN */
6259 PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8){ PIXMAN_OP_IN, PIXMAN_a8, (((1 << 2) | (1 << 5) |
(1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 <<
1) | (1 << 6)), sse2_composite_in_8_8 }
,
6260 PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8){ PIXMAN_OP_IN, (((0) << 24) | ((1) << 16) | ((0)
<< 12) | ((0) << 8) | ((0) << 4) | ((0))),
(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 <<
5) | (1 << 1) | (1 << 6)), sse2_composite_in_n_8_8
}
,
6261 PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8){ PIXMAN_OP_IN, (((0) << 24) | ((1) << 16) | ((0)
<< 12) | ((0) << 8) | ((0) << 4) | ((0))),
(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 <<
1) | (1 << 6)), sse2_composite_in_n_8 }
,
6262
6263 SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 23), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER,
}, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1
<< 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
, }
,
6264 SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 23), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER,
}, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1
<< 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
, }
,
6265 SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 23), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER,
}, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1
<< 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
, }
,
6266 SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 23), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER,
}, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1
<< 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
, }
,
6267
6268 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 23), (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
, }
,
6269 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 23), (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
, }
,
6270 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 23), (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
, }
,
6271 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 23), (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
, }
,
6272
6273 SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1
<< 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC,
}, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
16)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1
<< 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
, }
,
6274 SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1
<< 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC,
}, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
16)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1
<< 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
, }
,
6275 SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1
<< 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC,
}, { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
16)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1
<< 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
, }
,
6276 SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1
<< 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC,
}, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
16)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1
<< 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
, }
,
6277 SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1
<< 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC,
}, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
16)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1
<< 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
, }
,
6278 SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1
<< 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC,
}, { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
16)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1
<< 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
, }
,
6279
6280 SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC
, }
,
6281 SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC
, }
,
6282 SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
16)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC
, }
,
6283 SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
16)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC
, }
,
6284 SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 3) | (1 << 4)) | (1 <<
16)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC
, }
,
6285 SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 3) | (1 << 4)) | (1 <<
16)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC
, }
,
6286
6287 SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
, }
,
6288 SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
, }
,
6289 SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
, }
,
6290 SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
, }
,
6291
6292 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
, }
,
6293 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
, }
,
6294 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
, }
,
6295 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
, }
,
6296
6297 SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) |
((0) << 16) | ((0) << 12) | ((0) << 8) | (
(0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
, }
,
6298 SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) |
((0) << 16) | ((0) << 12) | ((0) << 8) | (
(0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
, }
,
6299 SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) |
((0) << 16) | ((0) << 12) | ((0) << 8) | (
(0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
, }
,
6300 SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
<< 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) |
((0) << 16) | ((0) << 12) | ((0) << 8) | (
(0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
(1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
, }
,
6301
6302 { PIXMAN_OP_NONE },
6303};
6304
6305static uint32_t *
6306sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
6307{
6308 int w = iter->width;
6309 __m128i ff000000 = mask_ff000000;
6310 uint32_t *dst = iter->buffer;
6311 uint32_t *src = (uint32_t *)iter->bits;
6312
6313 iter->bits += iter->stride;
6314
6315 while (w && ((uintptr_t)dst) & 0x0f)
6316 {
6317 *dst++ = (*src++) | 0xff000000;
6318 w--;
6319 }
6320
6321 while (w >= 4)
6322 {
6323 save_128_aligned (
6324 (__m128i *)dst, _mm_or_si128 (
6325 load_128_unaligned ((__m128i *)src), ff000000));
6326
6327 dst += 4;
6328 src += 4;
6329 w -= 4;
6330 }
6331
6332 while (w)
6333 {
6334 *dst++ = (*src++) | 0xff000000;
6335 w--;
6336 }
6337
6338 return iter->buffer;
6339}
6340
6341static uint32_t *
6342sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
6343{
6344 int w = iter->width;
6345 uint32_t *dst = iter->buffer;
6346 uint16_t *src = (uint16_t *)iter->bits;
6347 __m128i ff000000 = mask_ff000000;
6348
6349 iter->bits += iter->stride;
6350
6351 while (w && ((uintptr_t)dst) & 0x0f)
6352 {
6353 uint16_t s = *src++;
6354
6355 *dst++ = convert_0565_to_8888 (s);
6356 w--;
6357 }
6358
6359 while (w >= 8)
6360 {
6361 __m128i lo, hi, s;
6362
6363 s = _mm_loadu_si128 ((__m128i *)src);
6364
6365 lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ()));
6366 hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ()));
6367
6368 save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000));
6369 save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000));
6370
6371 dst += 8;
6372 src += 8;
6373 w -= 8;
6374 }
6375
6376 while (w)
6377 {
6378 uint16_t s = *src++;
6379
6380 *dst++ = convert_0565_to_8888 (s);
6381 w--;
6382 }
6383
6384 return iter->buffer;
6385}
6386
6387static uint32_t *
6388sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
6389{
6390 int w = iter->width;
6391 uint32_t *dst = iter->buffer;
6392 uint8_t *src = iter->bits;
6393 __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
6394
6395 iter->bits += iter->stride;
6396
6397 while (w && (((uintptr_t)dst) & 15))
6398 {
6399 *dst++ = (uint32_t)(*(src++)) << 24;
6400 w--;
6401 }
6402
6403 while (w >= 16)
6404 {
6405 xmm0 = _mm_loadu_si128((__m128i *)src);
6406
6407 xmm1 = _mm_unpacklo_epi8 (_mm_setzero_si128(), xmm0);
6408 xmm2 = _mm_unpackhi_epi8 (_mm_setzero_si128(), xmm0);
6409 xmm3 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm1);
6410 xmm4 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm1);
6411 xmm5 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm2);
6412 xmm6 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm2);
6413
6414 _mm_store_si128(((__m128i *)(dst + 0)), xmm3);
6415 _mm_store_si128(((__m128i *)(dst + 4)), xmm4);
6416 _mm_store_si128(((__m128i *)(dst + 8)), xmm5);
6417 _mm_store_si128(((__m128i *)(dst + 12)), xmm6);
6418
6419 dst += 16;
6420 src += 16;
6421 w -= 16;
6422 }
6423
6424 while (w)
6425 {
6426 *dst++ = (uint32_t)(*(src++)) << 24;
6427 w--;
6428 }
6429
6430 return iter->buffer;
6431}
6432
6433#define IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (1 << 0) | (1 << 25) | (1 << 23))
\
6434 (FAST_PATH_STANDARD_FLAGS((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6))
| FAST_PATH_ID_TRANSFORM(1 << 0) | \
6435 FAST_PATH_BITS_IMAGE(1 << 25) | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST(1 << 23))
6436
6437static const pixman_iter_info_t sse2_iters[] =
6438{
6439 { PIXMAN_x8r8g8b8, IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (1 << 0) | (1 << 25) | (1 << 23))
, ITER_NARROW,
6440 _pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL((void*)0)
6441 },
6442 { PIXMAN_r5g6b5, IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (1 << 0) | (1 << 25) | (1 << 23))
, ITER_NARROW,
6443 _pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL((void*)0)
6444 },
6445 { PIXMAN_a8, IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (1 << 0) | (1 << 25) | (1 << 23))
, ITER_NARROW,
6446 _pixman_iter_init_bits_stride, sse2_fetch_a8, NULL((void*)0)
6447 },
6448 { PIXMAN_null(((0) << 24) | ((0) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0)))
},
6449};
6450
6451#if defined(__GNUC__4) && !defined(__x86_64__1) && !defined(__amd64__1)
6452__attribute__((__force_align_arg_pointer__))
6453#endif
6454pixman_implementation_t *
6455_pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
6456{
6457 pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths);
6458
6459 /* SSE2 constants */
6460 mask_565_r = create_mask_2x32_128 (0x00f80000, 0x00f80000);
6461 mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000);
6462 mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0);
6463 mask_565_b = create_mask_2x32_128 (0x0000001f, 0x0000001f);
6464 mask_red = create_mask_2x32_128 (0x00f80000, 0x00f80000);
6465 mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00);
6466 mask_blue = create_mask_2x32_128 (0x000000f8, 0x000000f8);
6467 mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0);
6468 mask_565_fix_g = create_mask_2x32_128 (0x0000c000, 0x0000c000);
6469 mask_0080 = create_mask_16_128 (0x0080);
6470 mask_00ff = create_mask_16_128 (0x00ff);
6471 mask_0101 = create_mask_16_128 (0x0101);
6472 mask_ffff = create_mask_16_128 (0xffff);
6473 mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);
6474 mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);
6475 mask_565_rb = create_mask_2x32_128 (0x00f800f8, 0x00f800f8);
6476 mask_565_pack_multiplier = create_mask_2x32_128 (0x20000004, 0x20000004);
6477
6478 /* Set up function pointers */
6479 imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u;
6480 imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u;
6481 imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u;
6482 imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u;
6483 imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u;
6484 imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u;
6485 imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u;
6486 imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u;
6487 imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u;
6488 imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u;
6489
6490 imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u;
6491
6492 imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca;
6493 imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca;
6494 imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca;
6495 imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca;
6496 imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca;
6497 imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca;
6498 imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca;
6499 imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca;
6500 imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca;
6501 imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca;
6502 imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca;
6503
6504 imp->blt = sse2_blt;
6505 imp->fill = sse2_fill;
6506
6507 imp->iter_info = sse2_iters;
6508
6509 return imp;
6510}

/usr/lib/llvm-20/lib/clang/20/include/emmintrin.h

1/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __EMMINTRIN_H
11#define __EMMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__1)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17#include <xmmintrin.h>
18
19typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
20typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
21
22typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1)));
23typedef long long __m128i_u
24 __attribute__((__vector_size__(16), __aligned__(1)));
25
26/* Type defines. */
27typedef double __v2df __attribute__((__vector_size__(16)));
28typedef long long __v2di __attribute__((__vector_size__(16)));
29typedef short __v8hi __attribute__((__vector_size__(16)));
30typedef char __v16qi __attribute__((__vector_size__(16)));
31
32/* Unsigned types */
33typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
34typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
35typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
36
37/* We need an explicitly signed variant for char. Note that this shouldn't
38 * appear in the interface though. */
39typedef signed char __v16qs __attribute__((__vector_size__(16)));
40
41#ifdef __SSE2__1
42/* Both _Float16 and __bf16 require SSE2 being enabled. */
43typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
44typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16)));
45typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1)));
46
47typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16)));
48typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
49#endif
50
51/* Define the default attributes for the functions in this file. */
52#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
53#define __DEFAULT_FN_ATTRS \
54 __attribute__((__always_inline__, __nodebug__, \
55 __target__("sse2,no-evex512"), __min_vector_width__(128)))
56#else
57#define __DEFAULT_FN_ATTRS \
58 __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
59 __min_vector_width__(128)))
60#endif
61
62#if defined(__cplusplus) && (__cplusplus >= 201103L)
63#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
64#else
65#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
66#endif
67
68#define __trunc64(x) \
69 (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
70#define __anyext128(x) \
71 (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
72 1, -1, -1)
73
74/// Adds lower double-precision values in both operands and returns the
75/// sum in the lower 64 bits of the result. The upper 64 bits of the result
76/// are copied from the upper double-precision value of the first operand.
77///
78/// \headerfile <x86intrin.h>
79///
80/// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction.
81///
82/// \param __a
83/// A 128-bit vector of [2 x double] containing one of the source operands.
84/// \param __b
85/// A 128-bit vector of [2 x double] containing one of the source operands.
86/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
87/// sum of the lower 64 bits of both operands. The upper 64 bits are copied
88/// from the upper 64 bits of the first source operand.
89static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a,
90 __m128d __b) {
91 __a[0] += __b[0];
92 return __a;
93}
94
95/// Adds two 128-bit vectors of [2 x double].
96///
97/// \headerfile <x86intrin.h>
98///
99/// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction.
100///
101/// \param __a
102/// A 128-bit vector of [2 x double] containing one of the source operands.
103/// \param __b
104/// A 128-bit vector of [2 x double] containing one of the source operands.
105/// \returns A 128-bit vector of [2 x double] containing the sums of both
106/// operands.
107static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_pd(__m128d __a,
108 __m128d __b) {
109 return (__m128d)((__v2df)__a + (__v2df)__b);
110}
111
112/// Subtracts the lower double-precision value of the second operand
113/// from the lower double-precision value of the first operand and returns
114/// the difference in the lower 64 bits of the result. The upper 64 bits of
115/// the result are copied from the upper double-precision value of the first
116/// operand.
117///
118/// \headerfile <x86intrin.h>
119///
120/// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction.
121///
122/// \param __a
123/// A 128-bit vector of [2 x double] containing the minuend.
124/// \param __b
125/// A 128-bit vector of [2 x double] containing the subtrahend.
126/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
127/// difference of the lower 64 bits of both operands. The upper 64 bits are
128/// copied from the upper 64 bits of the first source operand.
129static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a,
130 __m128d __b) {
131 __a[0] -= __b[0];
132 return __a;
133}
134
135/// Subtracts two 128-bit vectors of [2 x double].
136///
137/// \headerfile <x86intrin.h>
138///
139/// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction.
140///
141/// \param __a
142/// A 128-bit vector of [2 x double] containing the minuend.
143/// \param __b
144/// A 128-bit vector of [2 x double] containing the subtrahend.
145/// \returns A 128-bit vector of [2 x double] containing the differences between
146/// both operands.
147static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_pd(__m128d __a,
148 __m128d __b) {
149 return (__m128d)((__v2df)__a - (__v2df)__b);
150}
151
152/// Multiplies lower double-precision values in both operands and returns
153/// the product in the lower 64 bits of the result. The upper 64 bits of the
154/// result are copied from the upper double-precision value of the first
155/// operand.
156///
157/// \headerfile <x86intrin.h>
158///
159/// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction.
160///
161/// \param __a
162/// A 128-bit vector of [2 x double] containing one of the source operands.
163/// \param __b
164/// A 128-bit vector of [2 x double] containing one of the source operands.
165/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
166/// product of the lower 64 bits of both operands. The upper 64 bits are
167/// copied from the upper 64 bits of the first source operand.
168static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a,
169 __m128d __b) {
170 __a[0] *= __b[0];
171 return __a;
172}
173
174/// Multiplies two 128-bit vectors of [2 x double].
175///
176/// \headerfile <x86intrin.h>
177///
178/// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction.
179///
180/// \param __a
181/// A 128-bit vector of [2 x double] containing one of the operands.
182/// \param __b
183/// A 128-bit vector of [2 x double] containing one of the operands.
184/// \returns A 128-bit vector of [2 x double] containing the products of both
185/// operands.
186static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_pd(__m128d __a,
187 __m128d __b) {
188 return (__m128d)((__v2df)__a * (__v2df)__b);
189}
190
191/// Divides the lower double-precision value of the first operand by the
192/// lower double-precision value of the second operand and returns the
193/// quotient in the lower 64 bits of the result. The upper 64 bits of the
194/// result are copied from the upper double-precision value of the first
195/// operand.
196///
197/// \headerfile <x86intrin.h>
198///
199/// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction.
200///
201/// \param __a
202/// A 128-bit vector of [2 x double] containing the dividend.
203/// \param __b
204/// A 128-bit vector of [2 x double] containing divisor.
205/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
206/// quotient of the lower 64 bits of both operands. The upper 64 bits are
207/// copied from the upper 64 bits of the first source operand.
208static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a,
209 __m128d __b) {
210 __a[0] /= __b[0];
211 return __a;
212}
213
214/// Performs an element-by-element division of two 128-bit vectors of
215/// [2 x double].
216///
217/// \headerfile <x86intrin.h>
218///
219/// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction.
220///
221/// \param __a
222/// A 128-bit vector of [2 x double] containing the dividend.
223/// \param __b
224/// A 128-bit vector of [2 x double] containing the divisor.
225/// \returns A 128-bit vector of [2 x double] containing the quotients of both
226/// operands.
227static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_pd(__m128d __a,
228 __m128d __b) {
229 return (__m128d)((__v2df)__a / (__v2df)__b);
230}
231
232/// Calculates the square root of the lower double-precision value of
233/// the second operand and returns it in the lower 64 bits of the result.
234/// The upper 64 bits of the result are copied from the upper
235/// double-precision value of the first operand.
236///
237/// \headerfile <x86intrin.h>
238///
239/// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction.
240///
241/// \param __a
242/// A 128-bit vector of [2 x double] containing one of the operands. The
243/// upper 64 bits of this operand are copied to the upper 64 bits of the
244/// result.
245/// \param __b
246/// A 128-bit vector of [2 x double] containing one of the operands. The
247/// square root is calculated using the lower 64 bits of this operand.
248/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
249/// square root of the lower 64 bits of operand \a __b, and whose upper 64
250/// bits are copied from the upper 64 bits of operand \a __a.
251static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a,
252 __m128d __b) {
253 __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);
254 return __extension__(__m128d){__c[0], __a[1]};
255}
256
257/// Calculates the square root of the each of two values stored in a
258/// 128-bit vector of [2 x double].
259///
260/// \headerfile <x86intrin.h>
261///
262/// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction.
263///
264/// \param __a
265/// A 128-bit vector of [2 x double].
266/// \returns A 128-bit vector of [2 x double] containing the square roots of the
267/// values in the operand.
268static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) {
269 return __builtin_ia32_sqrtpd((__v2df)__a);
270}
271
272/// Compares lower 64-bit double-precision values of both operands, and
273/// returns the lesser of the pair of values in the lower 64-bits of the
274/// result. The upper 64 bits of the result are copied from the upper
275/// double-precision value of the first operand.
276///
277/// If either value in a comparison is NaN, returns the value from \a __b.
278///
279/// \headerfile <x86intrin.h>
280///
281/// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction.
282///
283/// \param __a
284/// A 128-bit vector of [2 x double] containing one of the operands. The
285/// lower 64 bits of this operand are used in the comparison.
286/// \param __b
287/// A 128-bit vector of [2 x double] containing one of the operands. The
288/// lower 64 bits of this operand are used in the comparison.
289/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
290/// minimum value between both operands. The upper 64 bits are copied from
291/// the upper 64 bits of the first source operand.
292static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a,
293 __m128d __b) {
294 return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);
295}
296
297/// Performs element-by-element comparison of the two 128-bit vectors of
298/// [2 x double] and returns a vector containing the lesser of each pair of
299/// values.
300///
301/// If either value in a comparison is NaN, returns the value from \a __b.
302///
303/// \headerfile <x86intrin.h>
304///
305/// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction.
306///
307/// \param __a
308/// A 128-bit vector of [2 x double] containing one of the operands.
309/// \param __b
310/// A 128-bit vector of [2 x double] containing one of the operands.
311/// \returns A 128-bit vector of [2 x double] containing the minimum values
312/// between both operands.
313static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a,
314 __m128d __b) {
315 return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);
316}
317
318/// Compares lower 64-bit double-precision values of both operands, and
319/// returns the greater of the pair of values in the lower 64-bits of the
320/// result. The upper 64 bits of the result are copied from the upper
321/// double-precision value of the first operand.
322///
323/// If either value in a comparison is NaN, returns the value from \a __b.
324///
325/// \headerfile <x86intrin.h>
326///
327/// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction.
328///
329/// \param __a
330/// A 128-bit vector of [2 x double] containing one of the operands. The
331/// lower 64 bits of this operand are used in the comparison.
332/// \param __b
333/// A 128-bit vector of [2 x double] containing one of the operands. The
334/// lower 64 bits of this operand are used in the comparison.
335/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
336/// maximum value between both operands. The upper 64 bits are copied from
337/// the upper 64 bits of the first source operand.
338static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a,
339 __m128d __b) {
340 return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);
341}
342
343/// Performs element-by-element comparison of the two 128-bit vectors of
344/// [2 x double] and returns a vector containing the greater of each pair
345/// of values.
346///
347/// If either value in a comparison is NaN, returns the value from \a __b.
348///
349/// \headerfile <x86intrin.h>
350///
351/// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction.
352///
353/// \param __a
354/// A 128-bit vector of [2 x double] containing one of the operands.
355/// \param __b
356/// A 128-bit vector of [2 x double] containing one of the operands.
357/// \returns A 128-bit vector of [2 x double] containing the maximum values
358/// between both operands.
359static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a,
360 __m128d __b) {
361 return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);
362}
363
364/// Performs a bitwise AND of two 128-bit vectors of [2 x double].
365///
366/// \headerfile <x86intrin.h>
367///
368/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.
369///
370/// \param __a
371/// A 128-bit vector of [2 x double] containing one of the source operands.
372/// \param __b
373/// A 128-bit vector of [2 x double] containing one of the source operands.
374/// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the
375/// values between both operands.
376static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_and_pd(__m128d __a,
377 __m128d __b) {
378 return (__m128d)((__v2du)__a & (__v2du)__b);
379}
380
381/// Performs a bitwise AND of two 128-bit vectors of [2 x double], using
382/// the one's complement of the values contained in the first source operand.
383///
384/// \headerfile <x86intrin.h>
385///
386/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.
387///
388/// \param __a
389/// A 128-bit vector of [2 x double] containing the left source operand. The
390/// one's complement of this value is used in the bitwise AND.
391/// \param __b
392/// A 128-bit vector of [2 x double] containing the right source operand.
393/// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the
394/// values in the second operand and the one's complement of the first
395/// operand.
396static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
397_mm_andnot_pd(__m128d __a, __m128d __b) {
398 return (__m128d)(~(__v2du)__a & (__v2du)__b);
399}
400
401/// Performs a bitwise OR of two 128-bit vectors of [2 x double].
402///
403/// \headerfile <x86intrin.h>
404///
405/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.
406///
407/// \param __a
408/// A 128-bit vector of [2 x double] containing one of the source operands.
409/// \param __b
410/// A 128-bit vector of [2 x double] containing one of the source operands.
411/// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the
412/// values between both operands.
413static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_or_pd(__m128d __a,
414 __m128d __b) {
415 return (__m128d)((__v2du)__a | (__v2du)__b);
416}
417
418/// Performs a bitwise XOR of two 128-bit vectors of [2 x double].
419///
420/// \headerfile <x86intrin.h>
421///
422/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.
423///
424/// \param __a
425/// A 128-bit vector of [2 x double] containing one of the source operands.
426/// \param __b
427/// A 128-bit vector of [2 x double] containing one of the source operands.
428/// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the
429/// values between both operands.
430static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_xor_pd(__m128d __a,
431 __m128d __b) {
432 return (__m128d)((__v2du)__a ^ (__v2du)__b);
433}
434
435/// Compares each of the corresponding double-precision values of the
436/// 128-bit vectors of [2 x double] for equality.
437///
438/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
439/// If either value in a comparison is NaN, returns false.
440///
441/// \headerfile <x86intrin.h>
442///
443/// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction.
444///
445/// \param __a
446/// A 128-bit vector of [2 x double].
447/// \param __b
448/// A 128-bit vector of [2 x double].
449/// \returns A 128-bit vector containing the comparison results.
450static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a,
451 __m128d __b) {
452 return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);
453}
454
455/// Compares each of the corresponding double-precision values of the
456/// 128-bit vectors of [2 x double] to determine if the values in the first
457/// operand are less than those in the second operand.
458///
459/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
460/// If either value in a comparison is NaN, returns false.
461///
462/// \headerfile <x86intrin.h>
463///
464/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.
465///
466/// \param __a
467/// A 128-bit vector of [2 x double].
468/// \param __b
469/// A 128-bit vector of [2 x double].
470/// \returns A 128-bit vector containing the comparison results.
471static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a,
472 __m128d __b) {
473 return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);
474}
475
476/// Compares each of the corresponding double-precision values of the
477/// 128-bit vectors of [2 x double] to determine if the values in the first
478/// operand are less than or equal to those in the second operand.
479///
480/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
481/// If either value in a comparison is NaN, returns false.
482///
483/// \headerfile <x86intrin.h>
484///
485/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.
486///
487/// \param __a
488/// A 128-bit vector of [2 x double].
489/// \param __b
490/// A 128-bit vector of [2 x double].
491/// \returns A 128-bit vector containing the comparison results.
492static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a,
493 __m128d __b) {
494 return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);
495}
496
497/// Compares each of the corresponding double-precision values of the
498/// 128-bit vectors of [2 x double] to determine if the values in the first
499/// operand are greater than those in the second operand.
500///
501/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
502/// If either value in a comparison is NaN, returns false.
503///
504/// \headerfile <x86intrin.h>
505///
506/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.
507///
508/// \param __a
509/// A 128-bit vector of [2 x double].
510/// \param __b
511/// A 128-bit vector of [2 x double].
512/// \returns A 128-bit vector containing the comparison results.
513static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a,
514 __m128d __b) {
515 return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);
516}
517
518/// Compares each of the corresponding double-precision values of the
519/// 128-bit vectors of [2 x double] to determine if the values in the first
520/// operand are greater than or equal to those in the second operand.
521///
522/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
523/// If either value in a comparison is NaN, returns false.
524///
525/// \headerfile <x86intrin.h>
526///
527/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.
528///
529/// \param __a
530/// A 128-bit vector of [2 x double].
531/// \param __b
532/// A 128-bit vector of [2 x double].
533/// \returns A 128-bit vector containing the comparison results.
534static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a,
535 __m128d __b) {
536 return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);
537}
538
539/// Compares each of the corresponding double-precision values of the
540/// 128-bit vectors of [2 x double] to determine if the values in the first
541/// operand are ordered with respect to those in the second operand.
542///
543/// A pair of double-precision values are ordered with respect to each
544/// other if neither value is a NaN. Each comparison returns 0x0 for false,
545/// 0xFFFFFFFFFFFFFFFF for true.
546///
547/// \headerfile <x86intrin.h>
548///
549/// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction.
550///
551/// \param __a
552/// A 128-bit vector of [2 x double].
553/// \param __b
554/// A 128-bit vector of [2 x double].
555/// \returns A 128-bit vector containing the comparison results.
556static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a,
557 __m128d __b) {
558 return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);
559}
560
561/// Compares each of the corresponding double-precision values of the
562/// 128-bit vectors of [2 x double] to determine if the values in the first
563/// operand are unordered with respect to those in the second operand.
564///
565/// A pair of double-precision values are unordered with respect to each
566/// other if one or both values are NaN. Each comparison returns 0x0 for
567/// false, 0xFFFFFFFFFFFFFFFF for true.
568///
569/// \headerfile <x86intrin.h>
570///
571/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c>
572/// instruction.
573///
574/// \param __a
575/// A 128-bit vector of [2 x double].
576/// \param __b
577/// A 128-bit vector of [2 x double].
578/// \returns A 128-bit vector containing the comparison results.
579static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a,
580 __m128d __b) {
581 return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);
582}
583
584/// Compares each of the corresponding double-precision values of the
585/// 128-bit vectors of [2 x double] to determine if the values in the first
586/// operand are unequal to those in the second operand.
587///
588/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
589/// If either value in a comparison is NaN, returns true.
590///
591/// \headerfile <x86intrin.h>
592///
593/// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction.
594///
595/// \param __a
596/// A 128-bit vector of [2 x double].
597/// \param __b
598/// A 128-bit vector of [2 x double].
599/// \returns A 128-bit vector containing the comparison results.
600static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a,
601 __m128d __b) {
602 return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);
603}
604
605/// Compares each of the corresponding double-precision values of the
606/// 128-bit vectors of [2 x double] to determine if the values in the first
607/// operand are not less than those in the second operand.
608///
609/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
610/// If either value in a comparison is NaN, returns true.
611///
612/// \headerfile <x86intrin.h>
613///
614/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.
615///
616/// \param __a
617/// A 128-bit vector of [2 x double].
618/// \param __b
619/// A 128-bit vector of [2 x double].
620/// \returns A 128-bit vector containing the comparison results.
621static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a,
622 __m128d __b) {
623 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);
624}
625
626/// Compares each of the corresponding double-precision values of the
627/// 128-bit vectors of [2 x double] to determine if the values in the first
628/// operand are not less than or equal to those in the second operand.
629///
630/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
631/// If either value in a comparison is NaN, returns true.
632///
633/// \headerfile <x86intrin.h>
634///
635/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.
636///
637/// \param __a
638/// A 128-bit vector of [2 x double].
639/// \param __b
640/// A 128-bit vector of [2 x double].
641/// \returns A 128-bit vector containing the comparison results.
642static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a,
643 __m128d __b) {
644 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);
645}
646
647/// Compares each of the corresponding double-precision values of the
648/// 128-bit vectors of [2 x double] to determine if the values in the first
649/// operand are not greater than those in the second operand.
650///
651/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
652/// If either value in a comparison is NaN, returns true.
653///
654/// \headerfile <x86intrin.h>
655///
656/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.
657///
658/// \param __a
659/// A 128-bit vector of [2 x double].
660/// \param __b
661/// A 128-bit vector of [2 x double].
662/// \returns A 128-bit vector containing the comparison results.
663static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a,
664 __m128d __b) {
665 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);
666}
667
668/// Compares each of the corresponding double-precision values of the
669/// 128-bit vectors of [2 x double] to determine if the values in the first
670/// operand are not greater than or equal to those in the second operand.
671///
672/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
673/// If either value in a comparison is NaN, returns true.
674///
675/// \headerfile <x86intrin.h>
676///
677/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.
678///
679/// \param __a
680/// A 128-bit vector of [2 x double].
681/// \param __b
682/// A 128-bit vector of [2 x double].
683/// \returns A 128-bit vector containing the comparison results.
684static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a,
685 __m128d __b) {
686 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);
687}
688
689/// Compares the lower double-precision floating-point values in each of
690/// the two 128-bit floating-point vectors of [2 x double] for equality.
691///
692/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
693/// If either value in a comparison is NaN, returns false.
694///
695/// \headerfile <x86intrin.h>
696///
697/// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction.
698///
699/// \param __a
700/// A 128-bit vector of [2 x double]. The lower double-precision value is
701/// compared to the lower double-precision value of \a __b.
702/// \param __b
703/// A 128-bit vector of [2 x double]. The lower double-precision value is
704/// compared to the lower double-precision value of \a __a.
705/// \returns A 128-bit vector. The lower 64 bits contains the comparison
706/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
707static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a,
708 __m128d __b) {
709 return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);
710}
711
712/// Compares the lower double-precision floating-point values in each of
713/// the two 128-bit floating-point vectors of [2 x double] to determine if
714/// the value in the first parameter is less than the corresponding value in
715/// the second parameter.
716///
717/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
718/// If either value in a comparison is NaN, returns false.
719///
720/// \headerfile <x86intrin.h>
721///
722/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.
723///
724/// \param __a
725/// A 128-bit vector of [2 x double]. The lower double-precision value is
726/// compared to the lower double-precision value of \a __b.
727/// \param __b
728/// A 128-bit vector of [2 x double]. The lower double-precision value is
729/// compared to the lower double-precision value of \a __a.
730/// \returns A 128-bit vector. The lower 64 bits contains the comparison
731/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
732static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a,
733 __m128d __b) {
734 return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);
735}
736
737/// Compares the lower double-precision floating-point values in each of
738/// the two 128-bit floating-point vectors of [2 x double] to determine if
739/// the value in the first parameter is less than or equal to the
740/// corresponding value in the second parameter.
741///
742/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
743/// If either value in a comparison is NaN, returns false.
744///
745/// \headerfile <x86intrin.h>
746///
747/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.
748///
749/// \param __a
750/// A 128-bit vector of [2 x double]. The lower double-precision value is
751/// compared to the lower double-precision value of \a __b.
752/// \param __b
753/// A 128-bit vector of [2 x double]. The lower double-precision value is
754/// compared to the lower double-precision value of \a __a.
755/// \returns A 128-bit vector. The lower 64 bits contains the comparison
756/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
757static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a,
758 __m128d __b) {
759 return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
760}
761
762/// Compares the lower double-precision floating-point values in each of
763/// the two 128-bit floating-point vectors of [2 x double] to determine if
764/// the value in the first parameter is greater than the corresponding value
765/// in the second parameter.
766///
767/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
768/// If either value in a comparison is NaN, returns false.
769///
770/// \headerfile <x86intrin.h>
771///
772/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.
773///
774/// \param __a
775/// A 128-bit vector of [2 x double]. The lower double-precision value is
776/// compared to the lower double-precision value of \a __b.
777/// \param __b
778/// A 128-bit vector of [2 x double]. The lower double-precision value is
779/// compared to the lower double-precision value of \a __a.
780/// \returns A 128-bit vector. The lower 64 bits contains the comparison
781/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
782static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a,
783 __m128d __b) {
784 __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);
785 return __extension__(__m128d){__c[0], __a[1]};
786}
787
788/// Compares the lower double-precision floating-point values in each of
789/// the two 128-bit floating-point vectors of [2 x double] to determine if
790/// the value in the first parameter is greater than or equal to the
791/// corresponding value in the second parameter.
792///
793/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
794/// If either value in a comparison is NaN, returns false.
795///
796/// \headerfile <x86intrin.h>
797///
798/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.
799///
800/// \param __a
801/// A 128-bit vector of [2 x double]. The lower double-precision value is
802/// compared to the lower double-precision value of \a __b.
803/// \param __b
804/// A 128-bit vector of [2 x double]. The lower double-precision value is
805/// compared to the lower double-precision value of \a __a.
806/// \returns A 128-bit vector. The lower 64 bits contains the comparison
807/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
808static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a,
809 __m128d __b) {
810 __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);
811 return __extension__(__m128d){__c[0], __a[1]};
812}
813
814/// Compares the lower double-precision floating-point values in each of
815/// the two 128-bit floating-point vectors of [2 x double] to determine if
816/// the value in the first parameter is ordered with respect to the
817/// corresponding value in the second parameter.
818///
819/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair
820/// of double-precision values are ordered with respect to each other if
821/// neither value is a NaN.
822///
823/// \headerfile <x86intrin.h>
824///
825/// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction.
826///
827/// \param __a
828/// A 128-bit vector of [2 x double]. The lower double-precision value is
829/// compared to the lower double-precision value of \a __b.
830/// \param __b
831/// A 128-bit vector of [2 x double]. The lower double-precision value is
832/// compared to the lower double-precision value of \a __a.
833/// \returns A 128-bit vector. The lower 64 bits contains the comparison
834/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
835static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a,
836 __m128d __b) {
837 return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
838}
839
840/// Compares the lower double-precision floating-point values in each of
841/// the two 128-bit floating-point vectors of [2 x double] to determine if
842/// the value in the first parameter is unordered with respect to the
843/// corresponding value in the second parameter.
844///
845/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair
846/// of double-precision values are unordered with respect to each other if
847/// one or both values are NaN.
848///
849/// \headerfile <x86intrin.h>
850///
851/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c>
852/// instruction.
853///
854/// \param __a
855/// A 128-bit vector of [2 x double]. The lower double-precision value is
856/// compared to the lower double-precision value of \a __b.
857/// \param __b
858/// A 128-bit vector of [2 x double]. The lower double-precision value is
859/// compared to the lower double-precision value of \a __a.
860/// \returns A 128-bit vector. The lower 64 bits contains the comparison
861/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
862static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a,
863 __m128d __b) {
864 return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);
865}
866
867/// Compares the lower double-precision floating-point values in each of
868/// the two 128-bit floating-point vectors of [2 x double] to determine if
869/// the value in the first parameter is unequal to the corresponding value in
870/// the second parameter.
871///
872/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
873/// If either value in a comparison is NaN, returns true.
874///
875/// \headerfile <x86intrin.h>
876///
877/// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction.
878///
879/// \param __a
880/// A 128-bit vector of [2 x double]. The lower double-precision value is
881/// compared to the lower double-precision value of \a __b.
882/// \param __b
883/// A 128-bit vector of [2 x double]. The lower double-precision value is
884/// compared to the lower double-precision value of \a __a.
885/// \returns A 128-bit vector. The lower 64 bits contains the comparison
886/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
887static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a,
888 __m128d __b) {
889 return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);
890}
891
892/// Compares the lower double-precision floating-point values in each of
893/// the two 128-bit floating-point vectors of [2 x double] to determine if
894/// the value in the first parameter is not less than the corresponding
895/// value in the second parameter.
896///
897/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
898/// If either value in a comparison is NaN, returns true.
899///
900/// \headerfile <x86intrin.h>
901///
902/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.
903///
904/// \param __a
905/// A 128-bit vector of [2 x double]. The lower double-precision value is
906/// compared to the lower double-precision value of \a __b.
907/// \param __b
908/// A 128-bit vector of [2 x double]. The lower double-precision value is
909/// compared to the lower double-precision value of \a __a.
910/// \returns A 128-bit vector. The lower 64 bits contains the comparison
911/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
912static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a,
913 __m128d __b) {
914 return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);
915}
916
917/// Compares the lower double-precision floating-point values in each of
918/// the two 128-bit floating-point vectors of [2 x double] to determine if
919/// the value in the first parameter is not less than or equal to the
920/// corresponding value in the second parameter.
921///
922/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
923/// If either value in a comparison is NaN, returns true.
924///
925/// \headerfile <x86intrin.h>
926///
927/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.
928///
929/// \param __a
930/// A 128-bit vector of [2 x double]. The lower double-precision value is
931/// compared to the lower double-precision value of \a __b.
932/// \param __b
933/// A 128-bit vector of [2 x double]. The lower double-precision value is
934/// compared to the lower double-precision value of \a __a.
935/// \returns A 128-bit vector. The lower 64 bits contains the comparison
936/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
937static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a,
938 __m128d __b) {
939 return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);
940}
941
942/// Compares the lower double-precision floating-point values in each of
943/// the two 128-bit floating-point vectors of [2 x double] to determine if
944/// the value in the first parameter is not greater than the corresponding
945/// value in the second parameter.
946///
947/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
948/// If either value in a comparison is NaN, returns true.
949///
950/// \headerfile <x86intrin.h>
951///
952/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.
953///
954/// \param __a
955/// A 128-bit vector of [2 x double]. The lower double-precision value is
956/// compared to the lower double-precision value of \a __b.
957/// \param __b
958/// A 128-bit vector of [2 x double]. The lower double-precision value is
959/// compared to the lower double-precision value of \a __a.
960/// \returns A 128-bit vector. The lower 64 bits contains the comparison
961/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
962static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a,
963 __m128d __b) {
964 __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);
965 return __extension__(__m128d){__c[0], __a[1]};
966}
967
968/// Compares the lower double-precision floating-point values in each of
969/// the two 128-bit floating-point vectors of [2 x double] to determine if
970/// the value in the first parameter is not greater than or equal to the
971/// corresponding value in the second parameter.
972///
973/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
974/// If either value in a comparison is NaN, returns true.
975///
976/// \headerfile <x86intrin.h>
977///
978/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.
979///
980/// \param __a
981/// A 128-bit vector of [2 x double]. The lower double-precision value is
982/// compared to the lower double-precision value of \a __b.
983/// \param __b
984/// A 128-bit vector of [2 x double]. The lower double-precision value is
985/// compared to the lower double-precision value of \a __a.
986/// \returns A 128-bit vector. The lower 64 bits contains the comparison
987/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
988static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a,
989 __m128d __b) {
990 __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);
991 return __extension__(__m128d){__c[0], __a[1]};
992}
993
994/// Compares the lower double-precision floating-point values in each of
995/// the two 128-bit floating-point vectors of [2 x double] for equality.
996///
997/// The comparison returns 0 for false, 1 for true. If either value in a
998/// comparison is NaN, returns 0.
999///
1000/// \headerfile <x86intrin.h>
1001///
1002/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1003///
1004/// \param __a
1005/// A 128-bit vector of [2 x double]. The lower double-precision value is
1006/// compared to the lower double-precision value of \a __b.
1007/// \param __b
1008/// A 128-bit vector of [2 x double]. The lower double-precision value is
1009/// compared to the lower double-precision value of \a __a.
1010/// \returns An integer containing the comparison results.
1011static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a,
1012 __m128d __b) {
1013 return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);
1014}
1015
1016/// Compares the lower double-precision floating-point values in each of
1017/// the two 128-bit floating-point vectors of [2 x double] to determine if
1018/// the value in the first parameter is less than the corresponding value in
1019/// the second parameter.
1020///
1021/// The comparison returns 0 for false, 1 for true. If either value in a
1022/// comparison is NaN, returns 0.
1023///
1024/// \headerfile <x86intrin.h>
1025///
1026/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1027///
1028/// \param __a
1029/// A 128-bit vector of [2 x double]. The lower double-precision value is
1030/// compared to the lower double-precision value of \a __b.
1031/// \param __b
1032/// A 128-bit vector of [2 x double]. The lower double-precision value is
1033/// compared to the lower double-precision value of \a __a.
1034/// \returns An integer containing the comparison results.
1035static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a,
1036 __m128d __b) {
1037 return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);
1038}
1039
1040/// Compares the lower double-precision floating-point values in each of
1041/// the two 128-bit floating-point vectors of [2 x double] to determine if
1042/// the value in the first parameter is less than or equal to the
1043/// corresponding value in the second parameter.
1044///
1045/// The comparison returns 0 for false, 1 for true. If either value in a
1046/// comparison is NaN, returns 0.
1047///
1048/// \headerfile <x86intrin.h>
1049///
1050/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1051///
1052/// \param __a
1053/// A 128-bit vector of [2 x double]. The lower double-precision value is
1054/// compared to the lower double-precision value of \a __b.
1055/// \param __b
1056/// A 128-bit vector of [2 x double]. The lower double-precision value is
1057/// compared to the lower double-precision value of \a __a.
1058/// \returns An integer containing the comparison results.
1059static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a,
1060 __m128d __b) {
1061 return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);
1062}
1063
1064/// Compares the lower double-precision floating-point values in each of
1065/// the two 128-bit floating-point vectors of [2 x double] to determine if
1066/// the value in the first parameter is greater than the corresponding value
1067/// in the second parameter.
1068///
1069/// The comparison returns 0 for false, 1 for true. If either value in a
1070/// comparison is NaN, returns 0.
1071///
1072/// \headerfile <x86intrin.h>
1073///
1074/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1075///
1076/// \param __a
1077/// A 128-bit vector of [2 x double]. The lower double-precision value is
1078/// compared to the lower double-precision value of \a __b.
1079/// \param __b
1080/// A 128-bit vector of [2 x double]. The lower double-precision value is
1081/// compared to the lower double-precision value of \a __a.
1082/// \returns An integer containing the comparison results.
1083static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a,
1084 __m128d __b) {
1085 return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);
1086}
1087
1088/// Compares the lower double-precision floating-point values in each of
1089/// the two 128-bit floating-point vectors of [2 x double] to determine if
1090/// the value in the first parameter is greater than or equal to the
1091/// corresponding value in the second parameter.
1092///
1093/// The comparison returns 0 for false, 1 for true. If either value in a
1094/// comparison is NaN, returns 0.
1095///
1096/// \headerfile <x86intrin.h>
1097///
1098/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1099///
1100/// \param __a
1101/// A 128-bit vector of [2 x double]. The lower double-precision value is
1102/// compared to the lower double-precision value of \a __b.
1103/// \param __b
1104/// A 128-bit vector of [2 x double]. The lower double-precision value is
1105/// compared to the lower double-precision value of \a __a.
1106/// \returns An integer containing the comparison results.
1107static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a,
1108 __m128d __b) {
1109 return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);
1110}
1111
1112/// Compares the lower double-precision floating-point values in each of
1113/// the two 128-bit floating-point vectors of [2 x double] to determine if
1114/// the value in the first parameter is unequal to the corresponding value in
1115/// the second parameter.
1116///
1117/// The comparison returns 0 for false, 1 for true. If either value in a
1118/// comparison is NaN, returns 1.
1119///
1120/// \headerfile <x86intrin.h>
1121///
1122/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1123///
1124/// \param __a
1125/// A 128-bit vector of [2 x double]. The lower double-precision value is
1126/// compared to the lower double-precision value of \a __b.
1127/// \param __b
1128/// A 128-bit vector of [2 x double]. The lower double-precision value is
1129/// compared to the lower double-precision value of \a __a.
1130/// \returns An integer containing the comparison results.
1131static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a,
1132 __m128d __b) {
1133 return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);
1134}
1135
1136/// Compares the lower double-precision floating-point values in each of
1137/// the two 128-bit floating-point vectors of [2 x double] for equality.
1138///
1139/// The comparison returns 0 for false, 1 for true. If either value in a
1140/// comparison is NaN, returns 0.
1141///
1142/// \headerfile <x86intrin.h>
1143///
1144/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1145///
1146/// \param __a
1147/// A 128-bit vector of [2 x double]. The lower double-precision value is
1148/// compared to the lower double-precision value of \a __b.
1149/// \param __b
1150/// A 128-bit vector of [2 x double]. The lower double-precision value is
1151/// compared to the lower double-precision value of \a __a.
1152/// \returns An integer containing the comparison results.
1153static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a,
1154 __m128d __b) {
1155 return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);
1156}
1157
1158/// Compares the lower double-precision floating-point values in each of
1159/// the two 128-bit floating-point vectors of [2 x double] to determine if
1160/// the value in the first parameter is less than the corresponding value in
1161/// the second parameter.
1162///
1163/// The comparison returns 0 for false, 1 for true. If either value in a
1164/// comparison is NaN, returns 0.
1165///
1166/// \headerfile <x86intrin.h>
1167///
1168/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1169///
1170/// \param __a
1171/// A 128-bit vector of [2 x double]. The lower double-precision value is
1172/// compared to the lower double-precision value of \a __b.
1173/// \param __b
1174/// A 128-bit vector of [2 x double]. The lower double-precision value is
1175/// compared to the lower double-precision value of \a __a.
1176/// \returns An integer containing the comparison results.
1177static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a,
1178 __m128d __b) {
1179 return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);
1180}
1181
1182/// Compares the lower double-precision floating-point values in each of
1183/// the two 128-bit floating-point vectors of [2 x double] to determine if
1184/// the value in the first parameter is less than or equal to the
1185/// corresponding value in the second parameter.
1186///
1187/// The comparison returns 0 for false, 1 for true. If either value in a
1188/// comparison is NaN, returns 0.
1189///
1190/// \headerfile <x86intrin.h>
1191///
1192/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1193///
1194/// \param __a
1195/// A 128-bit vector of [2 x double]. The lower double-precision value is
1196/// compared to the lower double-precision value of \a __b.
1197/// \param __b
1198/// A 128-bit vector of [2 x double]. The lower double-precision value is
1199/// compared to the lower double-precision value of \a __a.
1200/// \returns An integer containing the comparison results.
1201static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a,
1202 __m128d __b) {
1203 return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);
1204}
1205
1206/// Compares the lower double-precision floating-point values in each of
1207/// the two 128-bit floating-point vectors of [2 x double] to determine if
1208/// the value in the first parameter is greater than the corresponding value
1209/// in the second parameter.
1210///
1211/// The comparison returns 0 for false, 1 for true. If either value in a
1212/// comparison is NaN, returns 0.
1213///
1214/// \headerfile <x86intrin.h>
1215///
1216/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1217///
1218/// \param __a
1219/// A 128-bit vector of [2 x double]. The lower double-precision value is
1220/// compared to the lower double-precision value of \a __b.
1221/// \param __b
1222/// A 128-bit vector of [2 x double]. The lower double-precision value is
1223/// compared to the lower double-precision value of \a __a.
1224/// \returns An integer containing the comparison results.
1225static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a,
1226 __m128d __b) {
1227 return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);
1228}
1229
1230/// Compares the lower double-precision floating-point values in each of
1231/// the two 128-bit floating-point vectors of [2 x double] to determine if
1232/// the value in the first parameter is greater than or equal to the
1233/// corresponding value in the second parameter.
1234///
1235/// The comparison returns 0 for false, 1 for true. If either value in a
1236/// comparison is NaN, returns 0.
1237///
1238/// \headerfile <x86intrin.h>
1239///
1240/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1241///
1242/// \param __a
1243/// A 128-bit vector of [2 x double]. The lower double-precision value is
1244/// compared to the lower double-precision value of \a __b.
1245/// \param __b
1246/// A 128-bit vector of [2 x double]. The lower double-precision value is
1247/// compared to the lower double-precision value of \a __a.
1248/// \returns An integer containing the comparison results.
1249static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a,
1250 __m128d __b) {
1251 return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);
1252}
1253
1254/// Compares the lower double-precision floating-point values in each of
1255/// the two 128-bit floating-point vectors of [2 x double] to determine if
1256/// the value in the first parameter is unequal to the corresponding value in
1257/// the second parameter.
1258///
1259/// The comparison returns 0 for false, 1 for true. If either value in a
1260/// comparison is NaN, returns 1.
1261///
1262/// \headerfile <x86intrin.h>
1263///
1264/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1265///
1266/// \param __a
1267/// A 128-bit vector of [2 x double]. The lower double-precision value is
1268/// compared to the lower double-precision value of \a __b.
1269/// \param __b
1270/// A 128-bit vector of [2 x double]. The lower double-precision value is
1271/// compared to the lower double-precision value of \a __a.
1272/// \returns An integer containing the comparison result.
1273static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a,
1274 __m128d __b) {
1275 return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);
1276}
1277
1278/// Converts the two double-precision floating-point elements of a
1279/// 128-bit vector of [2 x double] into two single-precision floating-point
1280/// values, returned in the lower 64 bits of a 128-bit vector of [4 x float].
1281/// The upper 64 bits of the result vector are set to zero.
1282///
1283/// \headerfile <x86intrin.h>
1284///
1285/// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction.
1286///
1287/// \param __a
1288/// A 128-bit vector of [2 x double].
1289/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
1290/// converted values. The upper 64 bits are set to zero.
1291static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) {
1292 return __builtin_ia32_cvtpd2ps((__v2df)__a);
1293}
1294
1295/// Converts the lower two single-precision floating-point elements of a
1296/// 128-bit vector of [4 x float] into two double-precision floating-point
1297/// values, returned in a 128-bit vector of [2 x double]. The upper two
1298/// elements of the input vector are unused.
1299///
1300/// \headerfile <x86intrin.h>
1301///
1302/// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction.
1303///
1304/// \param __a
1305/// A 128-bit vector of [4 x float]. The lower two single-precision
1306/// floating-point elements are converted to double-precision values. The
1307/// upper two elements are unused.
1308/// \returns A 128-bit vector of [2 x double] containing the converted values.
1309static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
1310_mm_cvtps_pd(__m128 __a) {
1311 return (__m128d) __builtin_convertvector(
1312 __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);
1313}
1314
1315/// Converts the lower two integer elements of a 128-bit vector of
1316/// [4 x i32] into two double-precision floating-point values, returned in a
1317/// 128-bit vector of [2 x double].
1318///
1319/// The upper two elements of the input vector are unused.
1320///
1321/// \headerfile <x86intrin.h>
1322///
1323/// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction.
1324///
1325/// \param __a
1326/// A 128-bit integer vector of [4 x i32]. The lower two integer elements are
1327/// converted to double-precision values.
1328///
1329/// The upper two elements are unused.
1330/// \returns A 128-bit vector of [2 x double] containing the converted values.
1331static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
1332_mm_cvtepi32_pd(__m128i __a) {
1333 return (__m128d) __builtin_convertvector(
1334 __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);
1335}
1336
1337/// Converts the two double-precision floating-point elements of a
1338/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
1339/// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper
1340/// 64 bits of the result vector are set to zero.
1341///
1342/// If a converted value does not fit in a 32-bit integer, raises a
1343/// floating-point invalid exception. If the exception is masked, returns
1344/// the most negative integer.
1345///
1346/// \headerfile <x86intrin.h>
1347///
1348/// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction.
1349///
1350/// \param __a
1351/// A 128-bit vector of [2 x double].
1352/// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the
1353/// converted values. The upper 64 bits are set to zero.
1354static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a) {
1355 return __builtin_ia32_cvtpd2dq((__v2df)__a);
1356}
1357
1358/// Converts the low-order element of a 128-bit vector of [2 x double]
1359/// into a 32-bit signed integer value.
1360///
1361/// If the converted value does not fit in a 32-bit integer, raises a
1362/// floating-point invalid exception. If the exception is masked, returns
1363/// the most negative integer.
1364///
1365/// \headerfile <x86intrin.h>
1366///
1367/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.
1368///
1369/// \param __a
1370/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
1371/// conversion.
1372/// \returns A 32-bit signed integer containing the converted value.
1373static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) {
1374 return __builtin_ia32_cvtsd2si((__v2df)__a);
1375}
1376
1377/// Converts the lower double-precision floating-point element of a
1378/// 128-bit vector of [2 x double], in the second parameter, into a
1379/// single-precision floating-point value, returned in the lower 32 bits of a
1380/// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are
1381/// copied from the upper 96 bits of the first parameter.
1382///
1383/// \headerfile <x86intrin.h>
1384///
1385/// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction.
1386///
1387/// \param __a
1388/// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are
1389/// copied to the upper 96 bits of the result.
1390/// \param __b
1391/// A 128-bit vector of [2 x double]. The lower double-precision
1392/// floating-point element is used in the conversion.
1393/// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the
1394/// converted value from the second parameter. The upper 96 bits are copied
1395/// from the upper 96 bits of the first parameter.
1396static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a,
1397 __m128d __b) {
1398 return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
1399}
1400
1401/// Converts a 32-bit signed integer value, in the second parameter, into
1402/// a double-precision floating-point value, returned in the lower 64 bits of
1403/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector
1404/// are copied from the upper 64 bits of the first parameter.
1405///
1406/// \headerfile <x86intrin.h>
1407///
1408/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.
1409///
1410/// \param __a
1411/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are
1412/// copied to the upper 64 bits of the result.
1413/// \param __b
1414/// A 32-bit signed integer containing the value to be converted.
1415/// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the
1416/// converted value from the second parameter. The upper 64 bits are copied
1417/// from the upper 64 bits of the first parameter.
1418static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
1419_mm_cvtsi32_sd(__m128d __a, int __b) {
1420 __a[0] = __b;
1421 return __a;
1422}
1423
1424/// Converts the lower single-precision floating-point element of a
1425/// 128-bit vector of [4 x float], in the second parameter, into a
1426/// double-precision floating-point value, returned in the lower 64 bits of
1427/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector
1428/// are copied from the upper 64 bits of the first parameter.
1429///
1430/// \headerfile <x86intrin.h>
1431///
1432/// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction.
1433///
1434/// \param __a
1435/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are
1436/// copied to the upper 64 bits of the result.
1437/// \param __b
1438/// A 128-bit vector of [4 x float]. The lower single-precision
1439/// floating-point element is used in the conversion.
1440/// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the
1441/// converted value from the second parameter. The upper 64 bits are copied
1442/// from the upper 64 bits of the first parameter.
1443static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
1444_mm_cvtss_sd(__m128d __a, __m128 __b) {
1445 __a[0] = __b[0];
1446 return __a;
1447}
1448
1449/// Converts the two double-precision floating-point elements of a
1450/// 128-bit vector of [2 x double] into two signed truncated (rounded
1451/// toward zero) 32-bit integer values, returned in the lower 64 bits
1452/// of a 128-bit vector of [4 x i32].
1453///
1454/// If a converted value does not fit in a 32-bit integer, raises a
1455/// floating-point invalid exception. If the exception is masked, returns
1456/// the most negative integer.
1457///
1458/// \headerfile <x86intrin.h>
1459///
1460/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c>
1461/// instruction.
1462///
1463/// \param __a
1464/// A 128-bit vector of [2 x double].
1465/// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the
1466/// converted values. The upper 64 bits are set to zero.
1467static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a) {
1468 return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);
1469}
1470
1471/// Converts the low-order element of a [2 x double] vector into a 32-bit
1472/// signed truncated (rounded toward zero) integer value.
1473///
1474/// If the converted value does not fit in a 32-bit integer, raises a
1475/// floating-point invalid exception. If the exception is masked, returns
1476/// the most negative integer.
1477///
1478/// \headerfile <x86intrin.h>
1479///
1480/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
1481/// instruction.
1482///
1483/// \param __a
1484/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
1485/// conversion.
1486/// \returns A 32-bit signed integer containing the converted value.
1487static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a) {
1488 return __builtin_ia32_cvttsd2si((__v2df)__a);
1489}
1490
1491/// Converts the two double-precision floating-point elements of a
1492/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
1493/// returned in a 64-bit vector of [2 x i32].
1494///
1495/// If a converted value does not fit in a 32-bit integer, raises a
1496/// floating-point invalid exception. If the exception is masked, returns
1497/// the most negative integer.
1498///
1499/// \headerfile <x86intrin.h>
1500///
1501/// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction.
1502///
1503/// \param __a
1504/// A 128-bit vector of [2 x double].
1505/// \returns A 64-bit vector of [2 x i32] containing the converted values.
1506static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtpd_pi32(__m128d __a) {
1507 return __trunc64(__builtin_ia32_cvtpd2dq((__v2df)__a));
1508}
1509
1510/// Converts the two double-precision floating-point elements of a
1511/// 128-bit vector of [2 x double] into two signed truncated (rounded toward
1512/// zero) 32-bit integer values, returned in a 64-bit vector of [2 x i32].
1513///
1514/// If a converted value does not fit in a 32-bit integer, raises a
1515/// floating-point invalid exception. If the exception is masked, returns
1516/// the most negative integer.
1517///
1518/// \headerfile <x86intrin.h>
1519///
1520/// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction.
1521///
1522/// \param __a
1523/// A 128-bit vector of [2 x double].
1524/// \returns A 64-bit vector of [2 x i32] containing the converted values.
1525static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvttpd_pi32(__m128d __a) {
1526 return __trunc64(__builtin_ia32_cvttpd2dq((__v2df)__a));
1527}
1528
1529/// Converts the two signed 32-bit integer elements of a 64-bit vector of
1530/// [2 x i32] into two double-precision floating-point values, returned in a
1531/// 128-bit vector of [2 x double].
1532///
1533/// \headerfile <x86intrin.h>
1534///
1535/// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction.
1536///
1537/// \param __a
1538/// A 64-bit vector of [2 x i32].
1539/// \returns A 128-bit vector of [2 x double] containing the converted values.
1540static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
1541_mm_cvtpi32_pd(__m64 __a) {
1542 return (__m128d) __builtin_convertvector((__v2si)__a, __v2df);
1543}
1544
1545/// Returns the low-order element of a 128-bit vector of [2 x double] as
1546/// a double-precision floating-point value.
1547///
1548/// \headerfile <x86intrin.h>
1549///
1550/// This intrinsic has no corresponding instruction.
1551///
1552/// \param __a
1553/// A 128-bit vector of [2 x double]. The lower 64 bits are returned.
1554/// \returns A double-precision floating-point value copied from the lower 64
1555/// bits of \a __a.
1556static __inline__ double __DEFAULT_FN_ATTRS_CONSTEXPR
1557_mm_cvtsd_f64(__m128d __a) {
1558 return __a[0];
1559}
1560
1561/// Loads a 128-bit floating-point vector of [2 x double] from an aligned
1562/// memory location.
1563///
1564/// \headerfile <x86intrin.h>
1565///
1566/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.
1567///
1568/// \param __dp
1569/// A pointer to a 128-bit memory location. The address of the memory
1570/// location has to be 16-byte aligned.
1571/// \returns A 128-bit vector of [2 x double] containing the loaded values.
1572static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp) {
1573 return *(const __m128d *)__dp;
1574}
1575
1576/// Loads a double-precision floating-point value from a specified memory
1577/// location and duplicates it to both vector elements of a 128-bit vector of
1578/// [2 x double].
1579///
1580/// \headerfile <x86intrin.h>
1581///
1582/// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction.
1583///
1584/// \param __dp
1585/// A pointer to a memory location containing a double-precision value.
1586/// \returns A 128-bit vector of [2 x double] containing the loaded and
1587/// duplicated values.
1588static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp) {
1589 struct __mm_load1_pd_struct {
1590 double __u;
1591 } __attribute__((__packed__, __may_alias__));
1592 double __u = ((const struct __mm_load1_pd_struct *)__dp)->__u;
1593 return __extension__(__m128d){__u, __u};
1594}
1595
1596#define _mm_load_pd1(dp)_mm_load1_pd(dp) _mm_load1_pd(dp)
1597
1598/// Loads two double-precision values, in reverse order, from an aligned
1599/// memory location into a 128-bit vector of [2 x double].
1600///
1601/// \headerfile <x86intrin.h>
1602///
1603/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction +
1604/// needed shuffling instructions. In AVX mode, the shuffling may be combined
1605/// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction.
1606///
1607/// \param __dp
1608/// A 16-byte aligned pointer to an array of double-precision values to be
1609/// loaded in reverse order.
1610/// \returns A 128-bit vector of [2 x double] containing the reversed loaded
1611/// values.
1612static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp) {
1613 __m128d __u = *(const __m128d *)__dp;
1614 return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);
1615}
1616
1617/// Loads a 128-bit floating-point vector of [2 x double] from an
1618/// unaligned memory location.
1619///
1620/// \headerfile <x86intrin.h>
1621///
1622/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.
1623///
1624/// \param __dp
1625/// A pointer to a 128-bit memory location. The address of the memory
1626/// location does not have to be aligned.
1627/// \returns A 128-bit vector of [2 x double] containing the loaded values.
1628static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp) {
1629 struct __loadu_pd {
1630 __m128d_u __v;
1631 } __attribute__((__packed__, __may_alias__));
1632 return ((const struct __loadu_pd *)__dp)->__v;
1633}
1634
1635/// Loads a 64-bit integer value to the low element of a 128-bit integer
1636/// vector and clears the upper element.
1637///
1638/// \headerfile <x86intrin.h>
1639///
1640/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
1641///
1642/// \param __a
1643/// A pointer to a 64-bit memory location. The address of the memory
1644/// location does not have to be aligned.
1645/// \returns A 128-bit vector of [2 x i64] containing the loaded value.
1646static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si64(void const *__a) {
1647 struct __loadu_si64 {
1648 long long __v;
1649 } __attribute__((__packed__, __may_alias__));
1650 long long __u = ((const struct __loadu_si64 *)__a)->__v;
1651 return __extension__(__m128i)(__v2di){__u, 0LL};
1652}
1653
1654/// Loads a 32-bit integer value to the low element of a 128-bit integer
1655/// vector and clears the upper element.
1656///
1657/// \headerfile <x86intrin.h>
1658///
1659/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
1660///
1661/// \param __a
1662/// A pointer to a 32-bit memory location. The address of the memory
1663/// location does not have to be aligned.
1664/// \returns A 128-bit vector of [4 x i32] containing the loaded value.
1665static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si32(void const *__a) {
1666 struct __loadu_si32 {
1667 int __v;
1668 } __attribute__((__packed__, __may_alias__));
1669 int __u = ((const struct __loadu_si32 *)__a)->__v;
1670 return __extension__(__m128i)(__v4si){__u, 0, 0, 0};
1671}
1672
1673/// Loads a 16-bit integer value to the low element of a 128-bit integer
1674/// vector and clears the upper element.
1675///
1676/// \headerfile <x86intrin.h>
1677///
1678/// This intrinsic does not correspond to a specific instruction.
1679///
1680/// \param __a
1681/// A pointer to a 16-bit memory location. The address of the memory
1682/// location does not have to be aligned.
1683/// \returns A 128-bit vector of [8 x i16] containing the loaded value.
1684static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si16(void const *__a) {
1685 struct __loadu_si16 {
1686 short __v;
1687 } __attribute__((__packed__, __may_alias__));
1688 short __u = ((const struct __loadu_si16 *)__a)->__v;
1689 return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
1690}
1691
1692/// Loads a 64-bit double-precision value to the low element of a
1693/// 128-bit integer vector and clears the upper element.
1694///
1695/// \headerfile <x86intrin.h>
1696///
1697/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.
1698///
1699/// \param __dp
1700/// A pointer to a memory location containing a double-precision value.
1701/// The address of the memory location does not have to be aligned.
1702/// \returns A 128-bit vector of [2 x double] containing the loaded value.
1703static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp) {
1704 struct __mm_load_sd_struct {
1705 double __u;
1706 } __attribute__((__packed__, __may_alias__));
1707 double __u = ((const struct __mm_load_sd_struct *)__dp)->__u;
1708 return __extension__(__m128d){__u, 0};
1709}
1710
1711/// Loads a double-precision value into the high-order bits of a 128-bit
1712/// vector of [2 x double]. The low-order bits are copied from the low-order
1713/// bits of the first operand.
1714///
1715/// \headerfile <x86intrin.h>
1716///
1717/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.
1718///
1719/// \param __a
1720/// A 128-bit vector of [2 x double]. \n
1721/// Bits [63:0] are written to bits [63:0] of the result.
1722/// \param __dp
1723/// A pointer to a 64-bit memory location containing a double-precision
1724/// floating-point value that is loaded. The loaded value is written to bits
1725/// [127:64] of the result. The address of the memory location does not have
1726/// to be aligned.
1727/// \returns A 128-bit vector of [2 x double] containing the moved values.
1728static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a,
1729 double const *__dp) {
1730 struct __mm_loadh_pd_struct {
1731 double __u;
1732 } __attribute__((__packed__, __may_alias__));
1733 double __u = ((const struct __mm_loadh_pd_struct *)__dp)->__u;
1734 return __extension__(__m128d){__a[0], __u};
1735}
1736
1737/// Loads a double-precision value into the low-order bits of a 128-bit
1738/// vector of [2 x double]. The high-order bits are copied from the
1739/// high-order bits of the first operand.
1740///
1741/// \headerfile <x86intrin.h>
1742///
1743/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.
1744///
1745/// \param __a
1746/// A 128-bit vector of [2 x double]. \n
1747/// Bits [127:64] are written to bits [127:64] of the result.
1748/// \param __dp
1749/// A pointer to a 64-bit memory location containing a double-precision
1750/// floating-point value that is loaded. The loaded value is written to bits
1751/// [63:0] of the result. The address of the memory location does not have to
1752/// be aligned.
1753/// \returns A 128-bit vector of [2 x double] containing the moved values.
1754static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a,
1755 double const *__dp) {
1756 struct __mm_loadl_pd_struct {
1757 double __u;
1758 } __attribute__((__packed__, __may_alias__));
1759 double __u = ((const struct __mm_loadl_pd_struct *)__dp)->__u;
1760 return __extension__(__m128d){__u, __a[1]};
1761}
1762
1763/// Constructs a 128-bit floating-point vector of [2 x double] with
1764/// unspecified content. This could be used as an argument to another
1765/// intrinsic function where the argument is required but the value is not
1766/// actually used.
1767///
1768/// \headerfile <x86intrin.h>
1769///
1770/// This intrinsic has no corresponding instruction.
1771///
1772/// \returns A 128-bit floating-point vector of [2 x double] with unspecified
1773/// content.
1774static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void) {
1775 return (__m128d)__builtin_ia32_undef128();
1776}
1777
1778/// Constructs a 128-bit floating-point vector of [2 x double]. The lower
1779/// 64 bits of the vector are initialized with the specified double-precision
1780/// floating-point value. The upper 64 bits are set to zero.
1781///
1782/// \headerfile <x86intrin.h>
1783///
1784/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
1785///
1786/// \param __w
1787/// A double-precision floating-point value used to initialize the lower 64
1788/// bits of the result.
1789/// \returns An initialized 128-bit floating-point vector of [2 x double]. The
1790/// lower 64 bits contain the value of the parameter. The upper 64 bits are
1791/// set to zero.
1792static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_sd(double __w) {
1793 return __extension__(__m128d){__w, 0.0};
1794}
1795
1796/// Constructs a 128-bit floating-point vector of [2 x double], with each
1797/// of the two double-precision floating-point vector elements set to the
1798/// specified double-precision floating-point value.
1799///
1800/// \headerfile <x86intrin.h>
1801///
1802/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.
1803///
1804/// \param __w
1805/// A double-precision floating-point value used to initialize each vector
1806/// element of the result.
1807/// \returns An initialized 128-bit floating-point vector of [2 x double].
1808static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_pd(double __w) {
1809 return __extension__(__m128d){__w, __w};
1810}
1811
1812/// Constructs a 128-bit floating-point vector of [2 x double], with each
1813/// of the two double-precision floating-point vector elements set to the
1814/// specified double-precision floating-point value.
1815///
1816/// \headerfile <x86intrin.h>
1817///
1818/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.
1819///
1820/// \param __w
1821/// A double-precision floating-point value used to initialize each vector
1822/// element of the result.
1823/// \returns An initialized 128-bit floating-point vector of [2 x double].
1824static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_pd1(double __w) {
1825 return _mm_set1_pd(__w);
1826}
1827
1828/// Constructs a 128-bit floating-point vector of [2 x double]
1829/// initialized with the specified double-precision floating-point values.
1830///
1831/// \headerfile <x86intrin.h>
1832///
1833/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
1834///
1835/// \param __w
1836/// A double-precision floating-point value used to initialize the upper 64
1837/// bits of the result.
1838/// \param __x
1839/// A double-precision floating-point value used to initialize the lower 64
1840/// bits of the result.
1841/// \returns An initialized 128-bit floating-point vector of [2 x double].
1842static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_pd(double __w,
1843 double __x) {
1844 return __extension__(__m128d){__x, __w};
1845}
1846
1847/// Constructs a 128-bit floating-point vector of [2 x double],
1848/// initialized in reverse order with the specified double-precision
1849/// floating-point values.
1850///
1851/// \headerfile <x86intrin.h>
1852///
1853/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
1854///
1855/// \param __w
1856/// A double-precision floating-point value used to initialize the lower 64
1857/// bits of the result.
1858/// \param __x
1859/// A double-precision floating-point value used to initialize the upper 64
1860/// bits of the result.
1861/// \returns An initialized 128-bit floating-point vector of [2 x double].
1862static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setr_pd(double __w,
1863 double __x) {
1864 return __extension__(__m128d){__w, __x};
1865}
1866
1867/// Constructs a 128-bit floating-point vector of [2 x double]
1868/// initialized to zero.
1869///
1870/// \headerfile <x86intrin.h>
1871///
1872/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
1873///
1874/// \returns An initialized 128-bit floating-point vector of [2 x double] with
1875/// all elements set to zero.
1876static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void) {
1877 return __extension__(__m128d){0.0, 0.0};
1878}
1879
1880/// Constructs a 128-bit floating-point vector of [2 x double]. The lower
1881/// 64 bits are set to the lower 64 bits of the second parameter. The upper
1882/// 64 bits are set to the upper 64 bits of the first parameter.
1883///
1884/// \headerfile <x86intrin.h>
1885///
1886/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.
1887///
1888/// \param __a
1889/// A 128-bit vector of [2 x double]. The upper 64 bits are written to the
1890/// upper 64 bits of the result.
1891/// \param __b
1892/// A 128-bit vector of [2 x double]. The lower 64 bits are written to the
1893/// lower 64 bits of the result.
1894/// \returns A 128-bit vector of [2 x double] containing the moved values.
1895static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
1896_mm_move_sd(__m128d __a, __m128d __b) {
1897 __a[0] = __b[0];
1898 return __a;
1899}
1900
1901/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
1902/// memory location.
1903///
1904/// \headerfile <x86intrin.h>
1905///
1906/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.
1907///
1908/// \param __dp
1909/// A pointer to a 64-bit memory location.
1910/// \param __a
1911/// A 128-bit vector of [2 x double] containing the value to be stored.
1912static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp,
1913 __m128d __a) {
1914 struct __mm_store_sd_struct {
1915 double __u;
1916 } __attribute__((__packed__, __may_alias__));
1917 ((struct __mm_store_sd_struct *)__dp)->__u = __a[0];
1918}
1919
1920/// Moves packed double-precision values from a 128-bit vector of
1921/// [2 x double] to a memory location.
1922///
1923/// \headerfile <x86intrin.h>
1924///
1925/// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction.
1926///
1927/// \param __dp
1928/// A pointer to an aligned memory location that can store two
1929/// double-precision values.
1930/// \param __a
1931/// A packed 128-bit vector of [2 x double] containing the values to be
1932/// moved.
1933static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp,
1934 __m128d __a) {
1935 *(__m128d *)__dp = __a;
1936}
1937
1938/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
1939/// the upper and lower 64 bits of a memory location.
1940///
1941/// \headerfile <x86intrin.h>
1942///
1943/// This intrinsic corresponds to the
1944/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
1945///
1946/// \param __dp
1947/// A pointer to a memory location that can store two double-precision
1948/// values.
1949/// \param __a
1950/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
1951/// of the values in \a __dp.
1952static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp,
1953 __m128d __a) {
1954 __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
1955 _mm_store_pd(__dp, __a);
1956}
1957
1958/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
1959/// the upper and lower 64 bits of a memory location.
1960///
1961/// \headerfile <x86intrin.h>
1962///
1963/// This intrinsic corresponds to the
1964/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
1965///
1966/// \param __dp
1967/// A pointer to a memory location that can store two double-precision
1968/// values.
1969/// \param __a
1970/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
1971/// of the values in \a __dp.
1972static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp,
1973 __m128d __a) {
1974 _mm_store1_pd(__dp, __a);
1975}
1976
1977/// Stores a 128-bit vector of [2 x double] into an unaligned memory
1978/// location.
1979///
1980/// \headerfile <x86intrin.h>
1981///
1982/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.
1983///
1984/// \param __dp
1985/// A pointer to a 128-bit memory location. The address of the memory
1986/// location does not have to be aligned.
1987/// \param __a
1988/// A 128-bit vector of [2 x double] containing the values to be stored.
1989static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp,
1990 __m128d __a) {
1991 struct __storeu_pd {
1992 __m128d_u __v;
1993 } __attribute__((__packed__, __may_alias__));
1994 ((struct __storeu_pd *)__dp)->__v = __a;
1995}
1996
1997/// Stores two double-precision values, in reverse order, from a 128-bit
1998/// vector of [2 x double] to a 16-byte aligned memory location.
1999///
2000/// \headerfile <x86intrin.h>
2001///
2002/// This intrinsic corresponds to a shuffling instruction followed by a
2003/// <c> VMOVAPD / MOVAPD </c> instruction.
2004///
2005/// \param __dp
2006/// A pointer to a 16-byte aligned memory location that can store two
2007/// double-precision values.
2008/// \param __a
2009/// A 128-bit vector of [2 x double] containing the values to be reversed and
2010/// stored.
2011static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp,
2012 __m128d __a) {
2013 __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);
2014 *(__m128d *)__dp = __a;
2015}
2016
2017/// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a
2018/// memory location.
2019///
2020/// \headerfile <x86intrin.h>
2021///
2022/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.
2023///
2024/// \param __dp
2025/// A pointer to a 64-bit memory location.
2026/// \param __a
2027/// A 128-bit vector of [2 x double] containing the value to be stored.
2028static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp,
2029 __m128d __a) {
2030 struct __mm_storeh_pd_struct {
2031 double __u;
2032 } __attribute__((__packed__, __may_alias__));
2033 ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[1];
2034}
2035
2036/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
2037/// memory location.
2038///
2039/// \headerfile <x86intrin.h>
2040///
2041/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.
2042///
2043/// \param __dp
2044/// A pointer to a 64-bit memory location.
2045/// \param __a
2046/// A 128-bit vector of [2 x double] containing the value to be stored.
2047static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp,
2048 __m128d __a) {
2049 struct __mm_storeh_pd_struct {
2050 double __u;
2051 } __attribute__((__packed__, __may_alias__));
2052 ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[0];
2053}
2054
2055/// Adds the corresponding elements of two 128-bit vectors of [16 x i8],
2056/// saving the lower 8 bits of each sum in the corresponding element of a
2057/// 128-bit result vector of [16 x i8].
2058///
2059/// The integer elements of both parameters can be either signed or unsigned.
2060///
2061/// \headerfile <x86intrin.h>
2062///
2063/// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction.
2064///
2065/// \param __a
2066/// A 128-bit vector of [16 x i8].
2067/// \param __b
2068/// A 128-bit vector of [16 x i8].
2069/// \returns A 128-bit vector of [16 x i8] containing the sums of both
2070/// parameters.
2071static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a,
2072 __m128i __b) {
2073 return (__m128i)((__v16qu)__a + (__v16qu)__b);
2074}
2075
2076/// Adds the corresponding elements of two 128-bit vectors of [8 x i16],
2077/// saving the lower 16 bits of each sum in the corresponding element of a
2078/// 128-bit result vector of [8 x i16].
2079///
2080/// The integer elements of both parameters can be either signed or unsigned.
2081///
2082/// \headerfile <x86intrin.h>
2083///
2084/// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction.
2085///
2086/// \param __a
2087/// A 128-bit vector of [8 x i16].
2088/// \param __b
2089/// A 128-bit vector of [8 x i16].
2090/// \returns A 128-bit vector of [8 x i16] containing the sums of both
2091/// parameters.
2092static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a,
2093 __m128i __b) {
2094 return (__m128i)((__v8hu)__a + (__v8hu)__b);
2095}
2096
2097/// Adds the corresponding elements of two 128-bit vectors of [4 x i32],
2098/// saving the lower 32 bits of each sum in the corresponding element of a
2099/// 128-bit result vector of [4 x i32].
2100///
2101/// The integer elements of both parameters can be either signed or unsigned.
2102///
2103/// \headerfile <x86intrin.h>
2104///
2105/// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction.
2106///
2107/// \param __a
2108/// A 128-bit vector of [4 x i32].
2109/// \param __b
2110/// A 128-bit vector of [4 x i32].
2111/// \returns A 128-bit vector of [4 x i32] containing the sums of both
2112/// parameters.
2113static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2114_mm_add_epi32(__m128i __a, __m128i __b) {
2115 return (__m128i)((__v4su)__a + (__v4su)__b);
2116}
2117
2118/// Adds two signed or unsigned 64-bit integer values, returning the
2119/// lower 64 bits of the sum.
2120///
2121/// \headerfile <x86intrin.h>
2122///
2123/// This intrinsic corresponds to the <c> PADDQ </c> instruction.
2124///
2125/// \param __a
2126/// A 64-bit integer.
2127/// \param __b
2128/// A 64-bit integer.
2129/// \returns A 64-bit integer containing the sum of both parameters.
2130static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) {
2131 return (__m64)(((unsigned long long)__a) + ((unsigned long long)__b));
2132}
2133
2134/// Adds the corresponding elements of two 128-bit vectors of [2 x i64],
2135/// saving the lower 64 bits of each sum in the corresponding element of a
2136/// 128-bit result vector of [2 x i64].
2137///
2138/// The integer elements of both parameters can be either signed or unsigned.
2139///
2140/// \headerfile <x86intrin.h>
2141///
2142/// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction.
2143///
2144/// \param __a
2145/// A 128-bit vector of [2 x i64].
2146/// \param __b
2147/// A 128-bit vector of [2 x i64].
2148/// \returns A 128-bit vector of [2 x i64] containing the sums of both
2149/// parameters.
2150static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2151_mm_add_epi64(__m128i __a, __m128i __b) {
2152 return (__m128i)((__v2du)__a + (__v2du)__b);
2153}
2154
2155/// Adds, with saturation, the corresponding elements of two 128-bit
2156/// signed [16 x i8] vectors, saving each sum in the corresponding element
2157/// of a 128-bit result vector of [16 x i8].
2158///
2159/// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
2160/// less than 0x80 are saturated to 0x80.
2161///
2162/// \headerfile <x86intrin.h>
2163///
2164/// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction.
2165///
2166/// \param __a
2167/// A 128-bit signed [16 x i8] vector.
2168/// \param __b
2169/// A 128-bit signed [16 x i8] vector.
2170/// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of
2171/// both parameters.
2172static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a,
2173 __m128i __b) {
2174 return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b);
2175}
2176
2177/// Adds, with saturation, the corresponding elements of two 128-bit
2178/// signed [8 x i16] vectors, saving each sum in the corresponding element
2179/// of a 128-bit result vector of [8 x i16].
2180///
2181/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
2182/// less than 0x8000 are saturated to 0x8000.
2183///
2184/// \headerfile <x86intrin.h>
2185///
2186/// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction.
2187///
2188/// \param __a
2189/// A 128-bit signed [8 x i16] vector.
2190/// \param __b
2191/// A 128-bit signed [8 x i16] vector.
2192/// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of
2193/// both parameters.
2194static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a,
2195 __m128i __b) {
2196 return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b);
2197}
2198
2199/// Adds, with saturation, the corresponding elements of two 128-bit
2200/// unsigned [16 x i8] vectors, saving each sum in the corresponding element
2201/// of a 128-bit result vector of [16 x i8].
2202///
2203/// Positive sums greater than 0xFF are saturated to 0xFF. Negative sums are
2204/// saturated to 0x00.
2205///
2206/// \headerfile <x86intrin.h>
2207///
2208/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.
2209///
2210/// \param __a
2211/// A 128-bit unsigned [16 x i8] vector.
2212/// \param __b
2213/// A 128-bit unsigned [16 x i8] vector.
2214/// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums
2215/// of both parameters.
2216static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a,
2217 __m128i __b) {
2218 return (__m128i)__builtin_elementwise_add_sat((__v16qu)__a, (__v16qu)__b);
2219}
2220
2221/// Adds, with saturation, the corresponding elements of two 128-bit
2222/// unsigned [8 x i16] vectors, saving each sum in the corresponding element
2223/// of a 128-bit result vector of [8 x i16].
2224///
2225/// Positive sums greater than 0xFFFF are saturated to 0xFFFF. Negative sums
2226/// are saturated to 0x0000.
2227///
2228/// \headerfile <x86intrin.h>
2229///
2230/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.
2231///
2232/// \param __a
2233/// A 128-bit unsigned [8 x i16] vector.
2234/// \param __b
2235/// A 128-bit unsigned [8 x i16] vector.
2236/// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums
2237/// of both parameters.
2238static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a,
2239 __m128i __b) {
2240 return (__m128i)__builtin_elementwise_add_sat((__v8hu)__a, (__v8hu)__b);
2241}
2242
2243/// Computes the rounded averages of corresponding elements of two
2244/// 128-bit unsigned [16 x i8] vectors, saving each result in the
2245/// corresponding element of a 128-bit result vector of [16 x i8].
2246///
2247/// \headerfile <x86intrin.h>
2248///
2249/// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction.
2250///
2251/// \param __a
2252/// A 128-bit unsigned [16 x i8] vector.
2253/// \param __b
2254/// A 128-bit unsigned [16 x i8] vector.
2255/// \returns A 128-bit unsigned [16 x i8] vector containing the rounded
2256/// averages of both parameters.
2257static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a,
2258 __m128i __b) {
2259 return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
2260}
2261
2262/// Computes the rounded averages of corresponding elements of two
2263/// 128-bit unsigned [8 x i16] vectors, saving each result in the
2264/// corresponding element of a 128-bit result vector of [8 x i16].
2265///
2266/// \headerfile <x86intrin.h>
2267///
2268/// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction.
2269///
2270/// \param __a
2271/// A 128-bit unsigned [8 x i16] vector.
2272/// \param __b
2273/// A 128-bit unsigned [8 x i16] vector.
2274/// \returns A 128-bit unsigned [8 x i16] vector containing the rounded
2275/// averages of both parameters.
2276static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a,
2277 __m128i __b) {
2278 return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
2279}
2280
2281/// Multiplies the corresponding elements of two 128-bit signed [8 x i16]
2282/// vectors, producing eight intermediate 32-bit signed integer products, and
2283/// adds the consecutive pairs of 32-bit products to form a 128-bit signed
2284/// [4 x i32] vector.
2285///
2286/// For example, bits [15:0] of both parameters are multiplied producing a
2287/// 32-bit product, bits [31:16] of both parameters are multiplied producing
2288/// a 32-bit product, and the sum of those two products becomes bits [31:0]
2289/// of the result.
2290///
2291/// \headerfile <x86intrin.h>
2292///
2293/// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction.
2294///
2295/// \param __a
2296/// A 128-bit signed [8 x i16] vector.
2297/// \param __b
2298/// A 128-bit signed [8 x i16] vector.
2299/// \returns A 128-bit signed [4 x i32] vector containing the sums of products
2300/// of both parameters.
2301static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a,
2302 __m128i __b) {
2303 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
2304}
2305
2306/// Compares corresponding elements of two 128-bit signed [8 x i16]
2307/// vectors, saving the greater value from each comparison in the
2308/// corresponding element of a 128-bit result vector of [8 x i16].
2309///
2310/// \headerfile <x86intrin.h>
2311///
2312/// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction.
2313///
2314/// \param __a
2315/// A 128-bit signed [8 x i16] vector.
2316/// \param __b
2317/// A 128-bit signed [8 x i16] vector.
2318/// \returns A 128-bit signed [8 x i16] vector containing the greater value of
2319/// each comparison.
2320static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a,
2321 __m128i __b) {
2322 return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b);
2323}
2324
2325/// Compares corresponding elements of two 128-bit unsigned [16 x i8]
2326/// vectors, saving the greater value from each comparison in the
2327/// corresponding element of a 128-bit result vector of [16 x i8].
2328///
2329/// \headerfile <x86intrin.h>
2330///
2331/// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction.
2332///
2333/// \param __a
2334/// A 128-bit unsigned [16 x i8] vector.
2335/// \param __b
2336/// A 128-bit unsigned [16 x i8] vector.
2337/// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of
2338/// each comparison.
2339static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a,
2340 __m128i __b) {
2341 return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b);
2342}
2343
2344/// Compares corresponding elements of two 128-bit signed [8 x i16]
2345/// vectors, saving the smaller value from each comparison in the
2346/// corresponding element of a 128-bit result vector of [8 x i16].
2347///
2348/// \headerfile <x86intrin.h>
2349///
2350/// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction.
2351///
2352/// \param __a
2353/// A 128-bit signed [8 x i16] vector.
2354/// \param __b
2355/// A 128-bit signed [8 x i16] vector.
2356/// \returns A 128-bit signed [8 x i16] vector containing the smaller value of
2357/// each comparison.
2358static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a,
2359 __m128i __b) {
2360 return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b);
2361}
2362
2363/// Compares corresponding elements of two 128-bit unsigned [16 x i8]
2364/// vectors, saving the smaller value from each comparison in the
2365/// corresponding element of a 128-bit result vector of [16 x i8].
2366///
2367/// \headerfile <x86intrin.h>
2368///
2369/// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction.
2370///
2371/// \param __a
2372/// A 128-bit unsigned [16 x i8] vector.
2373/// \param __b
2374/// A 128-bit unsigned [16 x i8] vector.
2375/// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of
2376/// each comparison.
2377static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a,
2378 __m128i __b) {
2379 return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b);
2380}
2381
2382/// Multiplies the corresponding elements of two signed [8 x i16]
2383/// vectors, saving the upper 16 bits of each 32-bit product in the
2384/// corresponding element of a 128-bit signed [8 x i16] result vector.
2385///
2386/// \headerfile <x86intrin.h>
2387///
2388/// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction.
2389///
2390/// \param __a
2391/// A 128-bit signed [8 x i16] vector.
2392/// \param __b
2393/// A 128-bit signed [8 x i16] vector.
2394/// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of
2395/// each of the eight 32-bit products.
2396static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a,
2397 __m128i __b) {
2398 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
2399}
2400
2401/// Multiplies the corresponding elements of two unsigned [8 x i16]
2402/// vectors, saving the upper 16 bits of each 32-bit product in the
2403/// corresponding element of a 128-bit unsigned [8 x i16] result vector.
2404///
2405/// \headerfile <x86intrin.h>
2406///
2407/// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction.
2408///
2409/// \param __a
2410/// A 128-bit unsigned [8 x i16] vector.
2411/// \param __b
2412/// A 128-bit unsigned [8 x i16] vector.
2413/// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits
2414/// of each of the eight 32-bit products.
2415static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a,
2416 __m128i __b) {
2417 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
2418}
2419
2420/// Multiplies the corresponding elements of two signed [8 x i16]
2421/// vectors, saving the lower 16 bits of each 32-bit product in the
2422/// corresponding element of a 128-bit signed [8 x i16] result vector.
2423///
2424/// \headerfile <x86intrin.h>
2425///
2426/// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction.
2427///
2428/// \param __a
2429/// A 128-bit signed [8 x i16] vector.
2430/// \param __b
2431/// A 128-bit signed [8 x i16] vector.
2432/// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of
2433/// each of the eight 32-bit products.
2434static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a,
2435 __m128i __b) {
2436 return (__m128i)((__v8hu)__a * (__v8hu)__b);
2437}
2438
2439/// Multiplies 32-bit unsigned integer values contained in the lower bits
2440/// of the two 64-bit integer vectors and returns the 64-bit unsigned
2441/// product.
2442///
2443/// \headerfile <x86intrin.h>
2444///
2445/// This intrinsic corresponds to the <c> PMULUDQ </c> instruction.
2446///
2447/// \param __a
2448/// A 64-bit integer containing one of the source operands.
2449/// \param __b
2450/// A 64-bit integer containing one of the source operands.
2451/// \returns A 64-bit integer vector containing the product of both operands.
2452static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
2453 return __trunc64(__builtin_ia32_pmuludq128((__v4si)__anyext128(__a),
2454 (__v4si)__anyext128(__b)));
2455}
2456
2457/// Multiplies 32-bit unsigned integer values contained in the lower
2458/// bits of the corresponding elements of two [2 x i64] vectors, and returns
2459/// the 64-bit products in the corresponding elements of a [2 x i64] vector.
2460///
2461/// \headerfile <x86intrin.h>
2462///
2463/// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction.
2464///
2465/// \param __a
2466/// A [2 x i64] vector containing one of the source operands.
2467/// \param __b
2468/// A [2 x i64] vector containing one of the source operands.
2469/// \returns A [2 x i64] vector containing the product of both operands.
2470static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a,
2471 __m128i __b) {
2472 return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
2473}
2474
2475/// Computes the absolute differences of corresponding 8-bit integer
2476/// values in two 128-bit vectors. Sums the first 8 absolute differences, and
2477/// separately sums the second 8 absolute differences. Packs these two
2478/// unsigned 16-bit integer sums into the upper and lower elements of a
2479/// [2 x i64] vector.
2480///
2481/// \headerfile <x86intrin.h>
2482///
2483/// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction.
2484///
2485/// \param __a
2486/// A 128-bit integer vector containing one of the source operands.
2487/// \param __b
2488/// A 128-bit integer vector containing one of the source operands.
2489/// \returns A [2 x i64] vector containing the sums of the sets of absolute
2490/// differences between both operands.
2491static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a,
2492 __m128i __b) {
2493 return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
2494}
2495
2496/// Subtracts the corresponding 8-bit integer values in the operands.
2497///
2498/// \headerfile <x86intrin.h>
2499///
2500/// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction.
2501///
2502/// \param __a
2503/// A 128-bit integer vector containing the minuends.
2504/// \param __b
2505/// A 128-bit integer vector containing the subtrahends.
2506/// \returns A 128-bit integer vector containing the differences of the values
2507/// in the operands.
2508static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a,
2509 __m128i __b) {
2510 return (__m128i)((__v16qu)__a - (__v16qu)__b);
2511}
2512
2513/// Subtracts the corresponding 16-bit integer values in the operands.
2514///
2515/// \headerfile <x86intrin.h>
2516///
2517/// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction.
2518///
2519/// \param __a
2520/// A 128-bit integer vector containing the minuends.
2521/// \param __b
2522/// A 128-bit integer vector containing the subtrahends.
2523/// \returns A 128-bit integer vector containing the differences of the values
2524/// in the operands.
2525static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a,
2526 __m128i __b) {
2527 return (__m128i)((__v8hu)__a - (__v8hu)__b);
2528}
2529
2530/// Subtracts the corresponding 32-bit integer values in the operands.
2531///
2532/// \headerfile <x86intrin.h>
2533///
2534/// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction.
2535///
2536/// \param __a
2537/// A 128-bit integer vector containing the minuends.
2538/// \param __b
2539/// A 128-bit integer vector containing the subtrahends.
2540/// \returns A 128-bit integer vector containing the differences of the values
2541/// in the operands.
2542static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2543_mm_sub_epi32(__m128i __a, __m128i __b) {
2544 return (__m128i)((__v4su)__a - (__v4su)__b);
2545}
2546
2547/// Subtracts signed or unsigned 64-bit integer values and writes the
2548/// difference to the corresponding bits in the destination.
2549///
2550/// \headerfile <x86intrin.h>
2551///
2552/// This intrinsic corresponds to the <c> PSUBQ </c> instruction.
2553///
2554/// \param __a
2555/// A 64-bit integer vector containing the minuend.
2556/// \param __b
2557/// A 64-bit integer vector containing the subtrahend.
2558/// \returns A 64-bit integer vector containing the difference of the values in
2559/// the operands.
2560static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) {
2561 return (__m64)((unsigned long long)__a - (unsigned long long)__b);
2562}
2563
2564/// Subtracts the corresponding elements of two [2 x i64] vectors.
2565///
2566/// \headerfile <x86intrin.h>
2567///
2568/// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction.
2569///
2570/// \param __a
2571/// A 128-bit integer vector containing the minuends.
2572/// \param __b
2573/// A 128-bit integer vector containing the subtrahends.
2574/// \returns A 128-bit integer vector containing the differences of the values
2575/// in the operands.
2576static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2577_mm_sub_epi64(__m128i __a, __m128i __b) {
2578 return (__m128i)((__v2du)__a - (__v2du)__b);
2579}
2580
2581/// Subtracts, with saturation, corresponding 8-bit signed integer values in
2582/// the input and returns the differences in the corresponding bytes in the
2583/// destination.
2584///
2585/// Differences greater than 0x7F are saturated to 0x7F, and differences
2586/// less than 0x80 are saturated to 0x80.
2587///
2588/// \headerfile <x86intrin.h>
2589///
2590/// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction.
2591///
2592/// \param __a
2593/// A 128-bit integer vector containing the minuends.
2594/// \param __b
2595/// A 128-bit integer vector containing the subtrahends.
2596/// \returns A 128-bit integer vector containing the differences of the values
2597/// in the operands.
2598static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a,
2599 __m128i __b) {
2600 return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b);
2601}
2602
2603/// Subtracts, with saturation, corresponding 16-bit signed integer values in
2604/// the input and returns the differences in the corresponding bytes in the
2605/// destination.
2606///
2607/// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less
2608/// than 0x8000 are saturated to 0x8000.
2609///
2610/// \headerfile <x86intrin.h>
2611///
2612/// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction.
2613///
2614/// \param __a
2615/// A 128-bit integer vector containing the minuends.
2616/// \param __b
2617/// A 128-bit integer vector containing the subtrahends.
2618/// \returns A 128-bit integer vector containing the differences of the values
2619/// in the operands.
2620static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a,
2621 __m128i __b) {
2622 return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b);
2623}
2624
2625/// Subtracts, with saturation, corresponding 8-bit unsigned integer values in
2626/// the input and returns the differences in the corresponding bytes in the
2627/// destination.
2628///
2629/// Differences less than 0x00 are saturated to 0x00.
2630///
2631/// \headerfile <x86intrin.h>
2632///
2633/// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction.
2634///
2635/// \param __a
2636/// A 128-bit integer vector containing the minuends.
2637/// \param __b
2638/// A 128-bit integer vector containing the subtrahends.
2639/// \returns A 128-bit integer vector containing the unsigned integer
2640/// differences of the values in the operands.
2641static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a,
2642 __m128i __b) {
2643 return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b);
2644}
2645
2646/// Subtracts, with saturation, corresponding 16-bit unsigned integer values in
2647/// the input and returns the differences in the corresponding bytes in the
2648/// destination.
2649///
2650/// Differences less than 0x0000 are saturated to 0x0000.
2651///
2652/// \headerfile <x86intrin.h>
2653///
2654/// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction.
2655///
2656/// \param __a
2657/// A 128-bit integer vector containing the minuends.
2658/// \param __b
2659/// A 128-bit integer vector containing the subtrahends.
2660/// \returns A 128-bit integer vector containing the unsigned integer
2661/// differences of the values in the operands.
2662static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a,
2663 __m128i __b) {
2664 return (__m128i)__builtin_elementwise_sub_sat((__v8hu)__a, (__v8hu)__b);
2665}
2666
2667/// Performs a bitwise AND of two 128-bit integer vectors.
2668///
2669/// \headerfile <x86intrin.h>
2670///
2671/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.
2672///
2673/// \param __a
2674/// A 128-bit integer vector containing one of the source operands.
2675/// \param __b
2676/// A 128-bit integer vector containing one of the source operands.
2677/// \returns A 128-bit integer vector containing the bitwise AND of the values
2678/// in both operands.
2679static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a,
2680 __m128i __b) {
2681 return (__m128i)((__v2du)__a & (__v2du)__b);
2682}
2683
2684/// Performs a bitwise AND of two 128-bit integer vectors, using the
2685/// one's complement of the values contained in the first source operand.
2686///
2687/// \headerfile <x86intrin.h>
2688///
2689/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.
2690///
2691/// \param __a
2692/// A 128-bit vector containing the left source operand. The one's complement
2693/// of this value is used in the bitwise AND.
2694/// \param __b
2695/// A 128-bit vector containing the right source operand.
2696/// \returns A 128-bit integer vector containing the bitwise AND of the one's
2697/// complement of the first operand and the values in the second operand.
2698static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a,
2699 __m128i __b) {
2700 return (__m128i)(~(__v2du)__a & (__v2du)__b);
2701}
2702/// Performs a bitwise OR of two 128-bit integer vectors.
2703///
2704/// \headerfile <x86intrin.h>
2705///
2706/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.
2707///
2708/// \param __a
2709/// A 128-bit integer vector containing one of the source operands.
2710/// \param __b
2711/// A 128-bit integer vector containing one of the source operands.
2712/// \returns A 128-bit integer vector containing the bitwise OR of the values
2713/// in both operands.
2714static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a,
2715 __m128i __b) {
2716 return (__m128i)((__v2du)__a | (__v2du)__b);
2717}
2718
2719/// Performs a bitwise exclusive OR of two 128-bit integer vectors.
2720///
2721/// \headerfile <x86intrin.h>
2722///
2723/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.
2724///
2725/// \param __a
2726/// A 128-bit integer vector containing one of the source operands.
2727/// \param __b
2728/// A 128-bit integer vector containing one of the source operands.
2729/// \returns A 128-bit integer vector containing the bitwise exclusive OR of the
2730/// values in both operands.
2731static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a,
2732 __m128i __b) {
2733 return (__m128i)((__v2du)__a ^ (__v2du)__b);
2734}
2735
2736/// Left-shifts the 128-bit integer vector operand by the specified
2737/// number of bytes. Low-order bits are cleared.
2738///
2739/// \headerfile <x86intrin.h>
2740///
2741/// \code
2742/// __m128i _mm_slli_si128(__m128i a, const int imm);
2743/// \endcode
2744///
2745/// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction.
2746///
2747/// \param a
2748/// A 128-bit integer vector containing the source operand.
2749/// \param imm
2750/// An immediate value specifying the number of bytes to left-shift operand
2751/// \a a.
2752/// \returns A 128-bit integer vector containing the left-shifted value.
2753#define _mm_slli_si128(a, imm)((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i
)(a), (int)(imm)))
\
2754 ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \
2755 (int)(imm)))
2756
2757#define _mm_bslli_si128(a, imm)((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i
)(a), (int)(imm)))
\
2758 ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \
2759 (int)(imm)))
2760
2761/// Left-shifts each 16-bit value in the 128-bit integer vector operand
2762/// by the specified number of bits. Low-order bits are cleared.
2763///
2764/// \headerfile <x86intrin.h>
2765///
2766/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.
2767///
2768/// \param __a
2769/// A 128-bit integer vector containing the source operand.
2770/// \param __count
2771/// An integer value specifying the number of bits to left-shift each value
2772/// in operand \a __a.
2773/// \returns A 128-bit integer vector containing the left-shifted values.
2774static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a,
2775 int __count) {
2776 return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
2777}
2778
2779/// Left-shifts each 16-bit value in the 128-bit integer vector operand
2780/// by the specified number of bits. Low-order bits are cleared.
2781///
2782/// \headerfile <x86intrin.h>
2783///
2784/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.
2785///
2786/// \param __a
2787/// A 128-bit integer vector containing the source operand.
2788/// \param __count
2789/// A 128-bit integer vector in which bits [63:0] specify the number of bits
2790/// to left-shift each value in operand \a __a.
2791/// \returns A 128-bit integer vector containing the left-shifted values.
2792static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a,
2793 __m128i __count) {
2794 return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
2795}
2796
2797/// Left-shifts each 32-bit value in the 128-bit integer vector operand
2798/// by the specified number of bits. Low-order bits are cleared.
2799///
2800/// \headerfile <x86intrin.h>
2801///
2802/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.
2803///
2804/// \param __a
2805/// A 128-bit integer vector containing the source operand.
2806/// \param __count
2807/// An integer value specifying the number of bits to left-shift each value
2808/// in operand \a __a.
2809/// \returns A 128-bit integer vector containing the left-shifted values.
2810static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a,
2811 int __count) {
2812 return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
2813}
2814
2815/// Left-shifts each 32-bit value in the 128-bit integer vector operand
2816/// by the specified number of bits. Low-order bits are cleared.
2817///
2818/// \headerfile <x86intrin.h>
2819///
2820/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.
2821///
2822/// \param __a
2823/// A 128-bit integer vector containing the source operand.
2824/// \param __count
2825/// A 128-bit integer vector in which bits [63:0] specify the number of bits
2826/// to left-shift each value in operand \a __a.
2827/// \returns A 128-bit integer vector containing the left-shifted values.
2828static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a,
2829 __m128i __count) {
2830 return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
2831}
2832
2833/// Left-shifts each 64-bit value in the 128-bit integer vector operand
2834/// by the specified number of bits. Low-order bits are cleared.
2835///
2836/// \headerfile <x86intrin.h>
2837///
2838/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.
2839///
2840/// \param __a
2841/// A 128-bit integer vector containing the source operand.
2842/// \param __count
2843/// An integer value specifying the number of bits to left-shift each value
2844/// in operand \a __a.
2845/// \returns A 128-bit integer vector containing the left-shifted values.
2846static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a,
2847 int __count) {
2848 return __builtin_ia32_psllqi128((__v2di)__a, __count);
2849}
2850
2851/// Left-shifts each 64-bit value in the 128-bit integer vector operand
2852/// by the specified number of bits. Low-order bits are cleared.
2853///
2854/// \headerfile <x86intrin.h>
2855///
2856/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.
2857///
2858/// \param __a
2859/// A 128-bit integer vector containing the source operand.
2860/// \param __count
2861/// A 128-bit integer vector in which bits [63:0] specify the number of bits
2862/// to left-shift each value in operand \a __a.
2863/// \returns A 128-bit integer vector containing the left-shifted values.
2864static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a,
2865 __m128i __count) {
2866 return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);
2867}
2868
2869/// Right-shifts each 16-bit value in the 128-bit integer vector operand
2870/// by the specified number of bits. High-order bits are filled with the sign
2871/// bit of the initial value.
2872///
2873/// \headerfile <x86intrin.h>
2874///
2875/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.
2876///
2877/// \param __a
2878/// A 128-bit integer vector containing the source operand.
2879/// \param __count
2880/// An integer value specifying the number of bits to right-shift each value
2881/// in operand \a __a.
2882/// \returns A 128-bit integer vector containing the right-shifted values.
2883static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a,
2884 int __count) {
2885 return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
2886}
2887
2888/// Right-shifts each 16-bit value in the 128-bit integer vector operand
2889/// by the specified number of bits. High-order bits are filled with the sign
2890/// bit of the initial value.
2891///
2892/// \headerfile <x86intrin.h>
2893///
2894/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.
2895///
2896/// \param __a
2897/// A 128-bit integer vector containing the source operand.
2898/// \param __count
2899/// A 128-bit integer vector in which bits [63:0] specify the number of bits
2900/// to right-shift each value in operand \a __a.
2901/// \returns A 128-bit integer vector containing the right-shifted values.
2902static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a,
2903 __m128i __count) {
2904 return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
2905}
2906
2907/// Right-shifts each 32-bit value in the 128-bit integer vector operand
2908/// by the specified number of bits. High-order bits are filled with the sign
2909/// bit of the initial value.
2910///
2911/// \headerfile <x86intrin.h>
2912///
2913/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.
2914///
2915/// \param __a
2916/// A 128-bit integer vector containing the source operand.
2917/// \param __count
2918/// An integer value specifying the number of bits to right-shift each value
2919/// in operand \a __a.
2920/// \returns A 128-bit integer vector containing the right-shifted values.
2921static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a,
2922 int __count) {
2923 return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
2924}
2925
2926/// Right-shifts each 32-bit value in the 128-bit integer vector operand
2927/// by the specified number of bits. High-order bits are filled with the sign
2928/// bit of the initial value.
2929///
2930/// \headerfile <x86intrin.h>
2931///
2932/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.
2933///
2934/// \param __a
2935/// A 128-bit integer vector containing the source operand.
2936/// \param __count
2937/// A 128-bit integer vector in which bits [63:0] specify the number of bits
2938/// to right-shift each value in operand \a __a.
2939/// \returns A 128-bit integer vector containing the right-shifted values.
2940static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a,
2941 __m128i __count) {
2942 return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
2943}
2944
2945/// Right-shifts the 128-bit integer vector operand by the specified
2946/// number of bytes. High-order bits are cleared.
2947///
2948/// \headerfile <x86intrin.h>
2949///
2950/// \code
2951/// __m128i _mm_srli_si128(__m128i a, const int imm);
2952/// \endcode
2953///
2954/// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction.
2955///
2956/// \param a
2957/// A 128-bit integer vector containing the source operand.
2958/// \param imm
2959/// An immediate value specifying the number of bytes to right-shift operand
2960/// \a a.
2961/// \returns A 128-bit integer vector containing the right-shifted value.
2962#define _mm_srli_si128(a, imm)((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i
)(a), (int)(imm)))
\
2963 ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \
2964 (int)(imm)))
2965
2966#define _mm_bsrli_si128(a, imm)((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i
)(a), (int)(imm)))
\
2967 ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \
2968 (int)(imm)))
2969
2970/// Right-shifts each of 16-bit values in the 128-bit integer vector
2971/// operand by the specified number of bits. High-order bits are cleared.
2972///
2973/// \headerfile <x86intrin.h>
2974///
2975/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.
2976///
2977/// \param __a
2978/// A 128-bit integer vector containing the source operand.
2979/// \param __count
2980/// An integer value specifying the number of bits to right-shift each value
2981/// in operand \a __a.
2982/// \returns A 128-bit integer vector containing the right-shifted values.
2983static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a,
2984 int __count) {
2985 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
2986}
2987
2988/// Right-shifts each of 16-bit values in the 128-bit integer vector
2989/// operand by the specified number of bits. High-order bits are cleared.
2990///
2991/// \headerfile <x86intrin.h>
2992///
2993/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.
2994///
2995/// \param __a
2996/// A 128-bit integer vector containing the source operand.
2997/// \param __count
2998/// A 128-bit integer vector in which bits [63:0] specify the number of bits
2999/// to right-shift each value in operand \a __a.
3000/// \returns A 128-bit integer vector containing the right-shifted values.
3001static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a,
3002 __m128i __count) {
3003 return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
3004}
3005
3006/// Right-shifts each of 32-bit values in the 128-bit integer vector
3007/// operand by the specified number of bits. High-order bits are cleared.
3008///
3009/// \headerfile <x86intrin.h>
3010///
3011/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.
3012///
3013/// \param __a
3014/// A 128-bit integer vector containing the source operand.
3015/// \param __count
3016/// An integer value specifying the number of bits to right-shift each value
3017/// in operand \a __a.
3018/// \returns A 128-bit integer vector containing the right-shifted values.
3019static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a,
3020 int __count) {
3021 return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
3022}
3023
3024/// Right-shifts each of 32-bit values in the 128-bit integer vector
3025/// operand by the specified number of bits. High-order bits are cleared.
3026///
3027/// \headerfile <x86intrin.h>
3028///
3029/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.
3030///
3031/// \param __a
3032/// A 128-bit integer vector containing the source operand.
3033/// \param __count
3034/// A 128-bit integer vector in which bits [63:0] specify the number of bits
3035/// to right-shift each value in operand \a __a.
3036/// \returns A 128-bit integer vector containing the right-shifted values.
3037static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a,
3038 __m128i __count) {
3039 return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
3040}
3041
3042/// Right-shifts each of 64-bit values in the 128-bit integer vector
3043/// operand by the specified number of bits. High-order bits are cleared.
3044///
3045/// \headerfile <x86intrin.h>
3046///
3047/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.
3048///
3049/// \param __a
3050/// A 128-bit integer vector containing the source operand.
3051/// \param __count
3052/// An integer value specifying the number of bits to right-shift each value
3053/// in operand \a __a.
3054/// \returns A 128-bit integer vector containing the right-shifted values.
3055static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a,
3056 int __count) {
3057 return __builtin_ia32_psrlqi128((__v2di)__a, __count);
3058}
3059
3060/// Right-shifts each of 64-bit values in the 128-bit integer vector
3061/// operand by the specified number of bits. High-order bits are cleared.
3062///
3063/// \headerfile <x86intrin.h>
3064///
3065/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.
3066///
3067/// \param __a
3068/// A 128-bit integer vector containing the source operand.
3069/// \param __count
3070/// A 128-bit integer vector in which bits [63:0] specify the number of bits
3071/// to right-shift each value in operand \a __a.
3072/// \returns A 128-bit integer vector containing the right-shifted values.
3073static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a,
3074 __m128i __count) {
3075 return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);
3076}
3077
3078/// Compares each of the corresponding 8-bit values of the 128-bit
3079/// integer vectors for equality.
3080///
3081/// Each comparison returns 0x0 for false, 0xFF for true.
3082///
3083/// \headerfile <x86intrin.h>
3084///
3085/// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction.
3086///
3087/// \param __a
3088/// A 128-bit integer vector.
3089/// \param __b
3090/// A 128-bit integer vector.
3091/// \returns A 128-bit integer vector containing the comparison results.
3092static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a,
3093 __m128i __b) {
3094 return (__m128i)((__v16qi)__a == (__v16qi)__b);
3095}
3096
3097/// Compares each of the corresponding 16-bit values of the 128-bit
3098/// integer vectors for equality.
3099///
3100/// Each comparison returns 0x0 for false, 0xFFFF for true.
3101///
3102/// \headerfile <x86intrin.h>
3103///
3104/// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction.
3105///
3106/// \param __a
3107/// A 128-bit integer vector.
3108/// \param __b
3109/// A 128-bit integer vector.
3110/// \returns A 128-bit integer vector containing the comparison results.
3111static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a,
3112 __m128i __b) {
3113 return (__m128i)((__v8hi)__a == (__v8hi)__b);
3114}
3115
3116/// Compares each of the corresponding 32-bit values of the 128-bit
3117/// integer vectors for equality.
3118///
3119/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
3120///
3121/// \headerfile <x86intrin.h>
3122///
3123/// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction.
3124///
3125/// \param __a
3126/// A 128-bit integer vector.
3127/// \param __b
3128/// A 128-bit integer vector.
3129/// \returns A 128-bit integer vector containing the comparison results.
3130static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a,
3131 __m128i __b) {
3132 return (__m128i)((__v4si)__a == (__v4si)__b);
3133}
3134
3135/// Compares each of the corresponding signed 8-bit values of the 128-bit
3136/// integer vectors to determine if the values in the first operand are
3137/// greater than those in the second operand.
3138///
3139/// Each comparison returns 0x0 for false, 0xFF for true.
3140///
3141/// \headerfile <x86intrin.h>
3142///
3143/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.
3144///
3145/// \param __a
3146/// A 128-bit integer vector.
3147/// \param __b
3148/// A 128-bit integer vector.
3149/// \returns A 128-bit integer vector containing the comparison results.
3150static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a,
3151 __m128i __b) {
3152 /* This function always performs a signed comparison, but __v16qi is a char
3153 which may be signed or unsigned, so use __v16qs. */
3154 return (__m128i)((__v16qs)__a > (__v16qs)__b);
3155}
3156
3157/// Compares each of the corresponding signed 16-bit values of the
3158/// 128-bit integer vectors to determine if the values in the first operand
3159/// are greater than those in the second operand.
3160///
3161/// Each comparison returns 0x0 for false, 0xFFFF for true.
3162///
3163/// \headerfile <x86intrin.h>
3164///
3165/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.
3166///
3167/// \param __a
3168/// A 128-bit integer vector.
3169/// \param __b
3170/// A 128-bit integer vector.
3171/// \returns A 128-bit integer vector containing the comparison results.
3172static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a,
3173 __m128i __b) {
3174 return (__m128i)((__v8hi)__a > (__v8hi)__b);
3175}
3176
3177/// Compares each of the corresponding signed 32-bit values of the
3178/// 128-bit integer vectors to determine if the values in the first operand
3179/// are greater than those in the second operand.
3180///
3181/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
3182///
3183/// \headerfile <x86intrin.h>
3184///
3185/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.
3186///
3187/// \param __a
3188/// A 128-bit integer vector.
3189/// \param __b
3190/// A 128-bit integer vector.
3191/// \returns A 128-bit integer vector containing the comparison results.
3192static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a,
3193 __m128i __b) {
3194 return (__m128i)((__v4si)__a > (__v4si)__b);
3195}
3196
3197/// Compares each of the corresponding signed 8-bit values of the 128-bit
3198/// integer vectors to determine if the values in the first operand are less
3199/// than those in the second operand.
3200///
3201/// Each comparison returns 0x0 for false, 0xFF for true.
3202///
3203/// \headerfile <x86intrin.h>
3204///
3205/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.
3206///
3207/// \param __a
3208/// A 128-bit integer vector.
3209/// \param __b
3210/// A 128-bit integer vector.
3211/// \returns A 128-bit integer vector containing the comparison results.
3212static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a,
3213 __m128i __b) {
3214 return _mm_cmpgt_epi8(__b, __a);
3215}
3216
3217/// Compares each of the corresponding signed 16-bit values of the
3218/// 128-bit integer vectors to determine if the values in the first operand
3219/// are less than those in the second operand.
3220///
3221/// Each comparison returns 0x0 for false, 0xFFFF for true.
3222///
3223/// \headerfile <x86intrin.h>
3224///
3225/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.
3226///
3227/// \param __a
3228/// A 128-bit integer vector.
3229/// \param __b
3230/// A 128-bit integer vector.
3231/// \returns A 128-bit integer vector containing the comparison results.
3232static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a,
3233 __m128i __b) {
3234 return _mm_cmpgt_epi16(__b, __a);
3235}
3236
3237/// Compares each of the corresponding signed 32-bit values of the
3238/// 128-bit integer vectors to determine if the values in the first operand
3239/// are less than those in the second operand.
3240///
3241/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
3242///
3243/// \headerfile <x86intrin.h>
3244///
3245/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.
3246///
3247/// \param __a
3248/// A 128-bit integer vector.
3249/// \param __b
3250/// A 128-bit integer vector.
3251/// \returns A 128-bit integer vector containing the comparison results.
3252static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a,
3253 __m128i __b) {
3254 return _mm_cmpgt_epi32(__b, __a);
3255}
3256
3257#ifdef __x86_64__1
3258/// Converts a 64-bit signed integer value from the second operand into a
3259/// double-precision value and returns it in the lower element of a [2 x
3260/// double] vector; the upper element of the returned vector is copied from
3261/// the upper element of the first operand.
3262///
3263/// \headerfile <x86intrin.h>
3264///
3265/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.
3266///
3267/// \param __a
3268/// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are
3269/// copied to the upper 64 bits of the destination.
3270/// \param __b
3271/// A 64-bit signed integer operand containing the value to be converted.
3272/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
3273/// converted value of the second operand. The upper 64 bits are copied from
3274/// the upper 64 bits of the first operand.
3275static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
3276_mm_cvtsi64_sd(__m128d __a, long long __b) {
3277 __a[0] = __b;
3278 return __a;
3279}
3280
3281/// Converts the first (lower) element of a vector of [2 x double] into a
3282/// 64-bit signed integer value.
3283///
3284/// If the converted value does not fit in a 64-bit integer, raises a
3285/// floating-point invalid exception. If the exception is masked, returns
3286/// the most negative integer.
3287///
3288/// \headerfile <x86intrin.h>
3289///
3290/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.
3291///
3292/// \param __a
3293/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
3294/// conversion.
3295/// \returns A 64-bit signed integer containing the converted value.
3296static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsd_si64(__m128d __a) {
3297 return __builtin_ia32_cvtsd2si64((__v2df)__a);
3298}
3299
3300/// Converts the first (lower) element of a vector of [2 x double] into a
3301/// 64-bit signed truncated (rounded toward zero) integer value.
3302///
3303/// If a converted value does not fit in a 64-bit integer, raises a
3304/// floating-point invalid exception. If the exception is masked, returns
3305/// the most negative integer.
3306///
3307/// \headerfile <x86intrin.h>
3308///
3309/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
3310/// instruction.
3311///
3312/// \param __a
3313/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
3314/// conversion.
3315/// \returns A 64-bit signed integer containing the converted value.
3316static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttsd_si64(__m128d __a) {
3317 return __builtin_ia32_cvttsd2si64((__v2df)__a);
3318}
3319#endif
3320
3321/// Converts a vector of [4 x i32] into a vector of [4 x float].
3322///
3323/// \headerfile <x86intrin.h>
3324///
3325/// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction.
3326///
3327/// \param __a
3328/// A 128-bit integer vector.
3329/// \returns A 128-bit vector of [4 x float] containing the converted values.
3330static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
3331_mm_cvtepi32_ps(__m128i __a) {
3332 return (__m128) __builtin_convertvector((__v4si)__a, __v4sf);
3333}
3334
3335/// Converts a vector of [4 x float] into a vector of [4 x i32].
3336///
3337/// If a converted value does not fit in a 32-bit integer, raises a
3338/// floating-point invalid exception. If the exception is masked, returns
3339/// the most negative integer.
3340///
3341/// \headerfile <x86intrin.h>
3342///
3343/// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction.
3344///
3345/// \param __a
3346/// A 128-bit vector of [4 x float].
3347/// \returns A 128-bit integer vector of [4 x i32] containing the converted
3348/// values.
3349static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a) {
3350 return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);
3351}
3352
3353/// Converts a vector of [4 x float] into four signed truncated (rounded toward
3354/// zero) 32-bit integers, returned in a vector of [4 x i32].
3355///
3356/// If a converted value does not fit in a 32-bit integer, raises a
3357/// floating-point invalid exception. If the exception is masked, returns
3358/// the most negative integer.
3359///
3360/// \headerfile <x86intrin.h>
3361///
3362/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c>
3363/// instruction.
3364///
3365/// \param __a
3366/// A 128-bit vector of [4 x float].
3367/// \returns A 128-bit vector of [4 x i32] containing the converted values.
3368static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) {
3369 return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
3370}
3371
3372/// Returns a vector of [4 x i32] where the lowest element is the input
3373/// operand and the remaining elements are zero.
3374///
3375/// \headerfile <x86intrin.h>
3376///
3377/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
3378///
3379/// \param __a
3380/// A 32-bit signed integer operand.
3381/// \returns A 128-bit vector of [4 x i32].
3382static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) {
3383 return __extension__(__m128i)(__v4si){__a, 0, 0, 0};
3384}
3385
3386/// Returns a vector of [2 x i64] where the lower element is the input
3387/// operand and the upper element is zero.
3388///
3389/// \headerfile <x86intrin.h>
3390///
3391/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction
3392/// in 64-bit mode.
3393///
3394/// \param __a
3395/// A 64-bit signed integer operand containing the value to be converted.
3396/// \returns A 128-bit vector of [2 x i64] containing the converted value.
3397static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) {
3398 return __extension__(__m128i)(__v2di){__a, 0};
3399}
3400
3401/// Moves the least significant 32 bits of a vector of [4 x i32] to a
3402/// 32-bit signed integer value.
3403///
3404/// \headerfile <x86intrin.h>
3405///
3406/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
3407///
3408/// \param __a
3409/// A vector of [4 x i32]. The least significant 32 bits are moved to the
3410/// destination.
3411/// \returns A 32-bit signed integer containing the moved value.
3412static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) {
3413 __v4si __b = (__v4si)__a;
3414 return __b[0];
3415}
3416
3417/// Moves the least significant 64 bits of a vector of [2 x i64] to a
3418/// 64-bit signed integer value.
3419///
3420/// \headerfile <x86intrin.h>
3421///
3422/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
3423///
3424/// \param __a
3425/// A vector of [2 x i64]. The least significant 64 bits are moved to the
3426/// destination.
3427/// \returns A 64-bit signed integer containing the moved value.
3428static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsi128_si64(__m128i __a) {
3429 return __a[0];
3430}
3431
3432/// Moves packed integer values from an aligned 128-bit memory location
3433/// to elements in a 128-bit integer vector.
3434///
3435/// \headerfile <x86intrin.h>
3436///
3437/// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction.
3438///
3439/// \param __p
3440/// An aligned pointer to a memory location containing integer values.
3441/// \returns A 128-bit integer vector containing the moved values.
3442static __inline__ __m128i __DEFAULT_FN_ATTRS
3443_mm_load_si128(__m128i const *__p) {
3444 return *__p;
3445}
3446
3447/// Moves packed integer values from an unaligned 128-bit memory location
3448/// to elements in a 128-bit integer vector.
3449///
3450/// \headerfile <x86intrin.h>
3451///
3452/// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction.
3453///
3454/// \param __p
3455/// A pointer to a memory location containing integer values.
3456/// \returns A 128-bit integer vector containing the moved values.
3457static __inline__ __m128i __DEFAULT_FN_ATTRS
3458_mm_loadu_si128(__m128i_u const *__p) {
3459 struct __loadu_si128 {
3460 __m128i_u __v;
3461 } __attribute__((__packed__, __may_alias__));
3462 return ((const struct __loadu_si128 *)__p)->__v;
14
Access to field '__v' results in a dereference of a null pointer (loaded from variable '__p')
3463}
3464
3465/// Returns a vector of [2 x i64] where the lower element is taken from
3466/// the lower element of the operand, and the upper element is zero.
3467///
3468/// \headerfile <x86intrin.h>
3469///
3470/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
3471///
3472/// \param __p
3473/// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of
3474/// the destination.
3475/// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the
3476/// moved value. The higher order bits are cleared.
3477static __inline__ __m128i __DEFAULT_FN_ATTRS
3478_mm_loadl_epi64(__m128i_u const *__p) {
3479 struct __mm_loadl_epi64_struct {
3480 long long __u;
3481 } __attribute__((__packed__, __may_alias__));
3482 return __extension__(__m128i){
3483 ((const struct __mm_loadl_epi64_struct *)__p)->__u, 0};
3484}
3485
3486/// Generates a 128-bit vector of [4 x i32] with unspecified content.
3487/// This could be used as an argument to another intrinsic function where the
3488/// argument is required but the value is not actually used.
3489///
3490/// \headerfile <x86intrin.h>
3491///
3492/// This intrinsic has no corresponding instruction.
3493///
3494/// \returns A 128-bit vector of [4 x i32] with unspecified content.
3495static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) {
3496 return (__m128i)__builtin_ia32_undef128();
3497}
3498
3499/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
3500/// the specified 64-bit integer values.
3501///
3502/// \headerfile <x86intrin.h>
3503///
3504/// This intrinsic is a utility function and does not correspond to a specific
3505/// instruction.
3506///
3507/// \param __q1
3508/// A 64-bit integer value used to initialize the upper 64 bits of the
3509/// destination vector of [2 x i64].
3510/// \param __q0
3511/// A 64-bit integer value used to initialize the lower 64 bits of the
3512/// destination vector of [2 x i64].
3513/// \returns An initialized 128-bit vector of [2 x i64] containing the values
3514/// provided in the operands.
3515static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3516_mm_set_epi64x(long long __q1, long long __q0) {
3517 return __extension__(__m128i)(__v2di){__q0, __q1};
3518}
3519
3520/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
3521/// the specified 64-bit integer values.
3522///
3523/// \headerfile <x86intrin.h>
3524///
3525/// This intrinsic is a utility function and does not correspond to a specific
3526/// instruction.
3527///
3528/// \param __q1
3529/// A 64-bit integer value used to initialize the upper 64 bits of the
3530/// destination vector of [2 x i64].
3531/// \param __q0
3532/// A 64-bit integer value used to initialize the lower 64 bits of the
3533/// destination vector of [2 x i64].
3534/// \returns An initialized 128-bit vector of [2 x i64] containing the values
3535/// provided in the operands.
3536static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3537_mm_set_epi64(__m64 __q1, __m64 __q0) {
3538 return _mm_set_epi64x((long long)__q1[0], (long long)__q0[0]);
3539}
3540
3541/// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with
3542/// the specified 32-bit integer values.
3543///
3544/// \headerfile <x86intrin.h>
3545///
3546/// This intrinsic is a utility function and does not correspond to a specific
3547/// instruction.
3548///
3549/// \param __i3
3550/// A 32-bit integer value used to initialize bits [127:96] of the
3551/// destination vector.
3552/// \param __i2
3553/// A 32-bit integer value used to initialize bits [95:64] of the destination
3554/// vector.
3555/// \param __i1
3556/// A 32-bit integer value used to initialize bits [63:32] of the destination
3557/// vector.
3558/// \param __i0
3559/// A 32-bit integer value used to initialize bits [31:0] of the destination
3560/// vector.
3561/// \returns An initialized 128-bit vector of [4 x i32] containing the values
3562/// provided in the operands.
3563static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_epi32(int __i3,
3564 int __i2,
3565 int __i1,
3566 int __i0) {
3567 return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3};
3568}
3569
3570/// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with
3571/// the specified 16-bit integer values.
3572///
3573/// \headerfile <x86intrin.h>
3574///
3575/// This intrinsic is a utility function and does not correspond to a specific
3576/// instruction.
3577///
3578/// \param __w7
3579/// A 16-bit integer value used to initialize bits [127:112] of the
3580/// destination vector.
3581/// \param __w6
3582/// A 16-bit integer value used to initialize bits [111:96] of the
3583/// destination vector.
3584/// \param __w5
3585/// A 16-bit integer value used to initialize bits [95:80] of the destination
3586/// vector.
3587/// \param __w4
3588/// A 16-bit integer value used to initialize bits [79:64] of the destination
3589/// vector.
3590/// \param __w3
3591/// A 16-bit integer value used to initialize bits [63:48] of the destination
3592/// vector.
3593/// \param __w2
3594/// A 16-bit integer value used to initialize bits [47:32] of the destination
3595/// vector.
3596/// \param __w1
3597/// A 16-bit integer value used to initialize bits [31:16] of the destination
3598/// vector.
3599/// \param __w0
3600/// A 16-bit integer value used to initialize bits [15:0] of the destination
3601/// vector.
3602/// \returns An initialized 128-bit vector of [8 x i16] containing the values
3603/// provided in the operands.
3604static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3605_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3,
3606 short __w2, short __w1, short __w0) {
3607 return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3,
3608 __w4, __w5, __w6, __w7};
3609}
3610
3611/// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with
3612/// the specified 8-bit integer values.
3613///
3614/// \headerfile <x86intrin.h>
3615///
3616/// This intrinsic is a utility function and does not correspond to a specific
3617/// instruction.
3618///
3619/// \param __b15
3620/// Initializes bits [127:120] of the destination vector.
3621/// \param __b14
3622/// Initializes bits [119:112] of the destination vector.
3623/// \param __b13
3624/// Initializes bits [111:104] of the destination vector.
3625/// \param __b12
3626/// Initializes bits [103:96] of the destination vector.
3627/// \param __b11
3628/// Initializes bits [95:88] of the destination vector.
3629/// \param __b10
3630/// Initializes bits [87:80] of the destination vector.
3631/// \param __b9
3632/// Initializes bits [79:72] of the destination vector.
3633/// \param __b8
3634/// Initializes bits [71:64] of the destination vector.
3635/// \param __b7
3636/// Initializes bits [63:56] of the destination vector.
3637/// \param __b6
3638/// Initializes bits [55:48] of the destination vector.
3639/// \param __b5
3640/// Initializes bits [47:40] of the destination vector.
3641/// \param __b4
3642/// Initializes bits [39:32] of the destination vector.
3643/// \param __b3
3644/// Initializes bits [31:24] of the destination vector.
3645/// \param __b2
3646/// Initializes bits [23:16] of the destination vector.
3647/// \param __b1
3648/// Initializes bits [15:8] of the destination vector.
3649/// \param __b0
3650/// Initializes bits [7:0] of the destination vector.
3651/// \returns An initialized 128-bit vector of [16 x i8] containing the values
3652/// provided in the operands.
3653static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3654_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11,
3655 char __b10, char __b9, char __b8, char __b7, char __b6, char __b5,
3656 char __b4, char __b3, char __b2, char __b1, char __b0) {
3657 return __extension__(__m128i)(__v16qi){
3658 __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7,
3659 __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15};
3660}
3661
3662/// Initializes both values in a 128-bit integer vector with the
3663/// specified 64-bit integer value.
3664///
3665/// \headerfile <x86intrin.h>
3666///
3667/// This intrinsic is a utility function and does not correspond to a specific
3668/// instruction.
3669///
3670/// \param __q
3671/// Integer value used to initialize the elements of the destination integer
3672/// vector.
3673/// \returns An initialized 128-bit integer vector of [2 x i64] with both
3674/// elements containing the value provided in the operand.
3675static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3676_mm_set1_epi64x(long long __q) {
3677 return _mm_set_epi64x(__q, __q);
3678}
3679
3680/// Initializes both values in a 128-bit vector of [2 x i64] with the
3681/// specified 64-bit value.
3682///
3683/// \headerfile <x86intrin.h>
3684///
3685/// This intrinsic is a utility function and does not correspond to a specific
3686/// instruction.
3687///
3688/// \param __q
3689/// A 64-bit value used to initialize the elements of the destination integer
3690/// vector.
3691/// \returns An initialized 128-bit vector of [2 x i64] with all elements
3692/// containing the value provided in the operand.
3693static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3694_mm_set1_epi64(__m64 __q) {
3695 return _mm_set_epi64(__q, __q);
3696}
3697
3698/// Initializes all values in a 128-bit vector of [4 x i32] with the
3699/// specified 32-bit value.
3700///
3701/// \headerfile <x86intrin.h>
3702///
3703/// This intrinsic is a utility function and does not correspond to a specific
3704/// instruction.
3705///
3706/// \param __i
3707/// A 32-bit value used to initialize the elements of the destination integer
3708/// vector.
3709/// \returns An initialized 128-bit vector of [4 x i32] with all elements
3710/// containing the value provided in the operand.
3711static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i) {
3712 return _mm_set_epi32(__i, __i, __i, __i);
3713}
3714
3715/// Initializes all values in a 128-bit vector of [8 x i16] with the
3716/// specified 16-bit value.
3717///
3718/// \headerfile <x86intrin.h>
3719///
3720/// This intrinsic is a utility function and does not correspond to a specific
3721/// instruction.
3722///
3723/// \param __w
3724/// A 16-bit value used to initialize the elements of the destination integer
3725/// vector.
3726/// \returns An initialized 128-bit vector of [8 x i16] with all elements
3727/// containing the value provided in the operand.
3728static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3729_mm_set1_epi16(short __w) {
3730 return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);
3731}
3732
3733/// Initializes all values in a 128-bit vector of [16 x i8] with the
3734/// specified 8-bit value.
3735///
3736/// \headerfile <x86intrin.h>
3737///
3738/// This intrinsic is a utility function and does not correspond to a specific
3739/// instruction.
3740///
3741/// \param __b
3742/// An 8-bit value used to initialize the elements of the destination integer
3743/// vector.
3744/// \returns An initialized 128-bit vector of [16 x i8] with all elements
3745/// containing the value provided in the operand.
3746static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b) {
3747 return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
3748 __b, __b, __b, __b, __b);
3749}
3750
3751/// Constructs a 128-bit integer vector, initialized in reverse order
3752/// with the specified 64-bit integral values.
3753///
3754/// \headerfile <x86intrin.h>
3755///
3756/// This intrinsic does not correspond to a specific instruction.
3757///
3758/// \param __q0
3759/// A 64-bit integral value used to initialize the lower 64 bits of the
3760/// result.
3761/// \param __q1
3762/// A 64-bit integral value used to initialize the upper 64 bits of the
3763/// result.
3764/// \returns An initialized 128-bit integer vector.
3765static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3766_mm_setr_epi64(__m64 __q0, __m64 __q1) {
3767 return _mm_set_epi64(__q1, __q0);
3768}
3769
3770/// Constructs a 128-bit integer vector, initialized in reverse order
3771/// with the specified 32-bit integral values.
3772///
3773/// \headerfile <x86intrin.h>
3774///
3775/// This intrinsic is a utility function and does not correspond to a specific
3776/// instruction.
3777///
3778/// \param __i0
3779/// A 32-bit integral value used to initialize bits [31:0] of the result.
3780/// \param __i1
3781/// A 32-bit integral value used to initialize bits [63:32] of the result.
3782/// \param __i2
3783/// A 32-bit integral value used to initialize bits [95:64] of the result.
3784/// \param __i3
3785/// A 32-bit integral value used to initialize bits [127:96] of the result.
3786/// \returns An initialized 128-bit integer vector.
3787static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3788_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) {
3789 return _mm_set_epi32(__i3, __i2, __i1, __i0);
3790}
3791
3792/// Constructs a 128-bit integer vector, initialized in reverse order
3793/// with the specified 16-bit integral values.
3794///
3795/// \headerfile <x86intrin.h>
3796///
3797/// This intrinsic is a utility function and does not correspond to a specific
3798/// instruction.
3799///
3800/// \param __w0
3801/// A 16-bit integral value used to initialize bits [15:0] of the result.
3802/// \param __w1
3803/// A 16-bit integral value used to initialize bits [31:16] of the result.
3804/// \param __w2
3805/// A 16-bit integral value used to initialize bits [47:32] of the result.
3806/// \param __w3
3807/// A 16-bit integral value used to initialize bits [63:48] of the result.
3808/// \param __w4
3809/// A 16-bit integral value used to initialize bits [79:64] of the result.
3810/// \param __w5
3811/// A 16-bit integral value used to initialize bits [95:80] of the result.
3812/// \param __w6
3813/// A 16-bit integral value used to initialize bits [111:96] of the result.
3814/// \param __w7
3815/// A 16-bit integral value used to initialize bits [127:112] of the result.
3816/// \returns An initialized 128-bit integer vector.
3817static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3818_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4,
3819 short __w5, short __w6, short __w7) {
3820 return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);
3821}
3822
3823/// Constructs a 128-bit integer vector, initialized in reverse order
3824/// with the specified 8-bit integral values.
3825///
3826/// \headerfile <x86intrin.h>
3827///
3828/// This intrinsic is a utility function and does not correspond to a specific
3829/// instruction.
3830///
3831/// \param __b0
3832/// An 8-bit integral value used to initialize bits [7:0] of the result.
3833/// \param __b1
3834/// An 8-bit integral value used to initialize bits [15:8] of the result.
3835/// \param __b2
3836/// An 8-bit integral value used to initialize bits [23:16] of the result.
3837/// \param __b3
3838/// An 8-bit integral value used to initialize bits [31:24] of the result.
3839/// \param __b4
3840/// An 8-bit integral value used to initialize bits [39:32] of the result.
3841/// \param __b5
3842/// An 8-bit integral value used to initialize bits [47:40] of the result.
3843/// \param __b6
3844/// An 8-bit integral value used to initialize bits [55:48] of the result.
3845/// \param __b7
3846/// An 8-bit integral value used to initialize bits [63:56] of the result.
3847/// \param __b8
3848/// An 8-bit integral value used to initialize bits [71:64] of the result.
3849/// \param __b9
3850/// An 8-bit integral value used to initialize bits [79:72] of the result.
3851/// \param __b10
3852/// An 8-bit integral value used to initialize bits [87:80] of the result.
3853/// \param __b11
3854/// An 8-bit integral value used to initialize bits [95:88] of the result.
3855/// \param __b12
3856/// An 8-bit integral value used to initialize bits [103:96] of the result.
3857/// \param __b13
3858/// An 8-bit integral value used to initialize bits [111:104] of the result.
3859/// \param __b14
3860/// An 8-bit integral value used to initialize bits [119:112] of the result.
3861/// \param __b15
3862/// An 8-bit integral value used to initialize bits [127:120] of the result.
3863/// \returns An initialized 128-bit integer vector.
3864static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
3865_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
3866 char __b6, char __b7, char __b8, char __b9, char __b10,
3867 char __b11, char __b12, char __b13, char __b14, char __b15) {
3868 return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8,
3869 __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
3870}
3871
3872/// Creates a 128-bit integer vector initialized to zero.
3873///
3874/// \headerfile <x86intrin.h>
3875///
3876/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
3877///
3878/// \returns An initialized 128-bit integer vector with all elements set to
3879/// zero.
3880static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void) {
3881 return __extension__(__m128i)(__v2di){0LL, 0LL};
3882}
3883
3884/// Stores a 128-bit integer vector to a memory location aligned on a
3885/// 128-bit boundary.
3886///
3887/// \headerfile <x86intrin.h>
3888///
3889/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.
3890///
3891/// \param __p
3892/// A pointer to an aligned memory location that will receive the integer
3893/// values.
3894/// \param __b
3895/// A 128-bit integer vector containing the values to be moved.
3896static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p,
3897 __m128i __b) {
3898 *__p = __b;
3899}
3900
3901/// Stores a 128-bit integer vector to an unaligned memory location.
3902///
3903/// \headerfile <x86intrin.h>
3904///
3905/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.
3906///
3907/// \param __p
3908/// A pointer to a memory location that will receive the integer values.
3909/// \param __b
3910/// A 128-bit integer vector containing the values to be moved.
3911static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i_u *__p,
3912 __m128i __b) {
3913 struct __storeu_si128 {
3914 __m128i_u __v;
3915 } __attribute__((__packed__, __may_alias__));
3916 ((struct __storeu_si128 *)__p)->__v = __b;
3917}
3918
3919/// Stores a 64-bit integer value from the low element of a 128-bit integer
3920/// vector.
3921///
3922/// \headerfile <x86intrin.h>
3923///
3924/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
3925///
3926/// \param __p
3927/// A pointer to a 64-bit memory location. The address of the memory
3928/// location does not have to be aligned.
3929/// \param __b
3930/// A 128-bit integer vector containing the value to be stored.
3931static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si64(void *__p,
3932 __m128i __b) {
3933 struct __storeu_si64 {
3934 long long __v;
3935 } __attribute__((__packed__, __may_alias__));
3936 ((struct __storeu_si64 *)__p)->__v = ((__v2di)__b)[0];
3937}
3938
3939/// Stores a 32-bit integer value from the low element of a 128-bit integer
3940/// vector.
3941///
3942/// \headerfile <x86intrin.h>
3943///
3944/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
3945///
3946/// \param __p
3947/// A pointer to a 32-bit memory location. The address of the memory
3948/// location does not have to be aligned.
3949/// \param __b
3950/// A 128-bit integer vector containing the value to be stored.
3951static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si32(void *__p,
3952 __m128i __b) {
3953 struct __storeu_si32 {
3954 int __v;
3955 } __attribute__((__packed__, __may_alias__));
3956 ((struct __storeu_si32 *)__p)->__v = ((__v4si)__b)[0];
3957}
3958
3959/// Stores a 16-bit integer value from the low element of a 128-bit integer
3960/// vector.
3961///
3962/// \headerfile <x86intrin.h>
3963///
3964/// This intrinsic does not correspond to a specific instruction.
3965///
3966/// \param __p
3967/// A pointer to a 16-bit memory location. The address of the memory
3968/// location does not have to be aligned.
3969/// \param __b
3970/// A 128-bit integer vector containing the value to be stored.
3971static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si16(void *__p,
3972 __m128i __b) {
3973 struct __storeu_si16 {
3974 short __v;
3975 } __attribute__((__packed__, __may_alias__));
3976 ((struct __storeu_si16 *)__p)->__v = ((__v8hi)__b)[0];
3977}
3978
3979/// Moves bytes selected by the mask from the first operand to the
3980/// specified unaligned memory location. When a mask bit is 1, the
3981/// corresponding byte is written, otherwise it is not written.
3982///
3983/// To minimize caching, the data is flagged as non-temporal (unlikely to be
3984/// used again soon). Exception and trap behavior for elements not selected
3985/// for storage to memory are implementation dependent.
3986///
3987/// \headerfile <x86intrin.h>
3988///
3989/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c>
3990/// instruction.
3991///
3992/// \param __d
3993/// A 128-bit integer vector containing the values to be moved.
3994/// \param __n
3995/// A 128-bit integer vector containing the mask. The most significant bit of
3996/// each byte represents the mask bits.
3997/// \param __p
3998/// A pointer to an unaligned 128-bit memory location where the specified
3999/// values are moved.
4000static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d,
4001 __m128i __n,
4002 char *__p) {
4003 __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
4004}
4005
4006/// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to
4007/// a memory location.
4008///
4009/// \headerfile <x86intrin.h>
4010///
4011/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.
4012///
4013/// \param __p
4014/// A pointer to a 64-bit memory location that will receive the lower 64 bits
4015/// of the integer vector parameter.
4016/// \param __a
4017/// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the
4018/// value to be stored.
4019static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p,
4020 __m128i __a) {
4021 struct __mm_storel_epi64_struct {
4022 long long __u;
4023 } __attribute__((__packed__, __may_alias__));
4024 ((struct __mm_storel_epi64_struct *)__p)->__u = __a[0];
4025}
4026
4027/// Stores a 128-bit floating point vector of [2 x double] to a 128-bit
4028/// aligned memory location.
4029///
4030/// To minimize caching, the data is flagged as non-temporal (unlikely to be
4031/// used again soon).
4032///
4033/// \headerfile <x86intrin.h>
4034///
4035/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.
4036///
4037/// \param __p
4038/// A pointer to the 128-bit aligned memory location used to store the value.
4039/// \param __a
4040/// A vector of [2 x double] containing the 64-bit values to be stored.
4041static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p,
4042 __m128d __a) {
4043 __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p);
4044}
4045
4046/// Stores a 128-bit integer vector to a 128-bit aligned memory location.
4047///
4048/// To minimize caching, the data is flagged as non-temporal (unlikely to be
4049/// used again soon).
4050///
4051/// \headerfile <x86intrin.h>
4052///
4053/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.
4054///
4055/// \param __p
4056/// A pointer to the 128-bit aligned memory location used to store the value.
4057/// \param __a
4058/// A 128-bit integer vector containing the values to be stored.
4059static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p,
4060 __m128i __a) {
4061 __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p);
4062}
4063
4064/// Stores a 32-bit integer value in the specified memory location.
4065///
4066/// To minimize caching, the data is flagged as non-temporal (unlikely to be
4067/// used again soon).
4068///
4069/// \headerfile <x86intrin.h>
4070///
4071/// This intrinsic corresponds to the <c> MOVNTI </c> instruction.
4072///
4073/// \param __p
4074/// A pointer to the 32-bit memory location used to store the value.
4075/// \param __a
4076/// A 32-bit integer containing the value to be stored.
4077static __inline__ void
4078 __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
4079 _mm_stream_si32(void *__p, int __a) {
4080 __builtin_ia32_movnti((int *)__p, __a);
4081}
4082
4083#ifdef __x86_64__1
4084/// Stores a 64-bit integer value in the specified memory location.
4085///
4086/// To minimize caching, the data is flagged as non-temporal (unlikely to be
4087/// used again soon).
4088///
4089/// \headerfile <x86intrin.h>
4090///
4091/// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction.
4092///
4093/// \param __p
4094/// A pointer to the 64-bit memory location used to store the value.
4095/// \param __a
4096/// A 64-bit integer containing the value to be stored.
4097static __inline__ void
4098 __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
4099 _mm_stream_si64(void *__p, long long __a) {
4100 __builtin_ia32_movnti64((long long *)__p, __a);
4101}
4102#endif
4103
4104#if defined(__cplusplus)
4105extern "C" {
4106#endif
4107
4108/// The cache line containing \a __p is flushed and invalidated from all
4109/// caches in the coherency domain.
4110///
4111/// \headerfile <x86intrin.h>
4112///
4113/// This intrinsic corresponds to the <c> CLFLUSH </c> instruction.
4114///
4115/// \param __p
4116/// A pointer to the memory location used to identify the cache line to be
4117/// flushed.
4118void _mm_clflush(void const *__p);
4119
4120/// Forces strong memory ordering (serialization) between load
4121/// instructions preceding this instruction and load instructions following
4122/// this instruction, ensuring the system completes all previous loads before
4123/// executing subsequent loads.
4124///
4125/// \headerfile <x86intrin.h>
4126///
4127/// This intrinsic corresponds to the <c> LFENCE </c> instruction.
4128///
4129void _mm_lfence(void);
4130
4131/// Forces strong memory ordering (serialization) between load and store
4132/// instructions preceding this instruction and load and store instructions
4133/// following this instruction, ensuring that the system completes all
4134/// previous memory accesses before executing subsequent memory accesses.
4135///
4136/// \headerfile <x86intrin.h>
4137///
4138/// This intrinsic corresponds to the <c> MFENCE </c> instruction.
4139///
4140void _mm_mfence(void);
4141
4142#if defined(__cplusplus)
4143} // extern "C"
4144#endif
4145
4146/// Converts, with saturation, 16-bit signed integers from both 128-bit integer
4147/// vector operands into 8-bit signed integers, and packs the results into
4148/// the destination.
4149///
4150/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
4151/// less than 0x80 are saturated to 0x80.
4152///
4153/// \headerfile <x86intrin.h>
4154///
4155/// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction.
4156///
4157/// \param __a
4158/// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
4159/// written to the lower 64 bits of the result.
4160/// \param __b
4161/// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
4162/// written to the higher 64 bits of the result.
4163/// \returns A 128-bit vector of [16 x i8] containing the converted values.
4164static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a,
4165 __m128i __b) {
4166 return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
4167}
4168
4169/// Converts, with saturation, 32-bit signed integers from both 128-bit integer
4170/// vector operands into 16-bit signed integers, and packs the results into
4171/// the destination.
4172///
4173/// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
4174/// values less than 0x8000 are saturated to 0x8000.
4175///
4176/// \headerfile <x86intrin.h>
4177///
4178/// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction.
4179///
4180/// \param __a
4181/// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
4182/// are written to the lower 64 bits of the result.
4183/// \param __b
4184/// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
4185/// are written to the higher 64 bits of the result.
4186/// \returns A 128-bit vector of [8 x i16] containing the converted values.
4187static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a,
4188 __m128i __b) {
4189 return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
4190}
4191
4192/// Converts, with saturation, 16-bit signed integers from both 128-bit integer
4193/// vector operands into 8-bit unsigned integers, and packs the results into
4194/// the destination.
4195///
4196/// Values greater than 0xFF are saturated to 0xFF. Values less than 0x00
4197/// are saturated to 0x00.
4198///
4199/// \headerfile <x86intrin.h>
4200///
4201/// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction.
4202///
4203/// \param __a
4204/// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
4205/// written to the lower 64 bits of the result.
4206/// \param __b
4207/// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
4208/// written to the higher 64 bits of the result.
4209/// \returns A 128-bit vector of [16 x i8] containing the converted values.
4210static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a,
4211 __m128i __b) {
4212 return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
4213}
4214
4215/// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using
4216/// the immediate-value parameter as a selector.
4217///
4218/// \headerfile <x86intrin.h>
4219///
4220/// \code
4221/// __m128i _mm_extract_epi16(__m128i a, const int imm);
4222/// \endcode
4223///
4224/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.
4225///
4226/// \param a
4227/// A 128-bit integer vector.
4228/// \param imm
4229/// An immediate value. Bits [2:0] selects values from \a a to be assigned
4230/// to bits[15:0] of the result. \n
4231/// 000: assign values from bits [15:0] of \a a. \n
4232/// 001: assign values from bits [31:16] of \a a. \n
4233/// 010: assign values from bits [47:32] of \a a. \n
4234/// 011: assign values from bits [63:48] of \a a. \n
4235/// 100: assign values from bits [79:64] of \a a. \n
4236/// 101: assign values from bits [95:80] of \a a. \n
4237/// 110: assign values from bits [111:96] of \a a. \n
4238/// 111: assign values from bits [127:112] of \a a.
4239/// \returns An integer, whose lower 16 bits are selected from the 128-bit
4240/// integer vector parameter and the remaining bits are assigned zeros.
4241#define _mm_extract_epi16(a, imm)((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i
)(a), (int)(imm)))
\
4242 ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
4243 (int)(imm)))
4244
4245/// Constructs a 128-bit integer vector by first making a copy of the
4246/// 128-bit integer vector parameter, and then inserting the lower 16 bits
4247/// of an integer parameter into an offset specified by the immediate-value
4248/// parameter.
4249///
4250/// \headerfile <x86intrin.h>
4251///
4252/// \code
4253/// __m128i _mm_insert_epi16(__m128i a, int b, const int imm);
4254/// \endcode
4255///
4256/// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.
4257///
4258/// \param a
4259/// A 128-bit integer vector of [8 x i16]. This vector is copied to the
4260/// result and then one of the eight elements in the result is replaced by
4261/// the lower 16 bits of \a b.
4262/// \param b
4263/// An integer. The lower 16 bits of this parameter are written to the
4264/// result beginning at an offset specified by \a imm.
4265/// \param imm
4266/// An immediate value specifying the bit offset in the result at which the
4267/// lower 16 bits of \a b are written.
4268/// \returns A 128-bit integer vector containing the constructed values.
4269#define _mm_insert_epi16(a, b, imm)((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (
int)(b), (int)(imm)))
\
4270 ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
4271 (int)(imm)))
4272
4273/// Copies the values of the most significant bits from each 8-bit
4274/// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask
4275/// value, zero-extends the value, and writes it to the destination.
4276///
4277/// \headerfile <x86intrin.h>
4278///
4279/// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction.
4280///
4281/// \param __a
4282/// A 128-bit integer vector containing the values with bits to be extracted.
4283/// \returns The most significant bits from each 8-bit element in \a __a,
4284/// written to bits [15:0]. The other bits are assigned zeros.
4285static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
4286 return __builtin_ia32_pmovmskb128((__v16qi)__a);
4287}
4288
4289/// Constructs a 128-bit integer vector by shuffling four 32-bit
4290/// elements of a 128-bit integer vector parameter, using the immediate-value
4291/// parameter as a specifier.
4292///
4293/// \headerfile <x86intrin.h>
4294///
4295/// \code
4296/// __m128i _mm_shuffle_epi32(__m128i a, const int imm);
4297/// \endcode
4298///
4299/// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.
4300///
4301/// \param a
4302/// A 128-bit integer vector containing the values to be copied.
4303/// \param imm
4304/// An immediate value containing an 8-bit value specifying which elements to
4305/// copy from a. The destinations within the 128-bit destination are assigned
4306/// values as follows: \n
4307/// Bits [1:0] are used to assign values to bits [31:0] of the result. \n
4308/// Bits [3:2] are used to assign values to bits [63:32] of the result. \n
4309/// Bits [5:4] are used to assign values to bits [95:64] of the result. \n
4310/// Bits [7:6] are used to assign values to bits [127:96] of the result. \n
4311/// Bit value assignments: \n
4312/// 00: assign values from bits [31:0] of \a a. \n
4313/// 01: assign values from bits [63:32] of \a a. \n
4314/// 10: assign values from bits [95:64] of \a a. \n
4315/// 11: assign values from bits [127:96] of \a a. \n
4316/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
4317/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
4318/// <c>[b6, b4, b2, b0]</c>.
4319/// \returns A 128-bit integer vector containing the shuffled values.
4320#define _mm_shuffle_epi32(a, imm)((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm
)))
\
4321 ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)))
4322
4323/// Constructs a 128-bit integer vector by shuffling four lower 16-bit
4324/// elements of a 128-bit integer vector of [8 x i16], using the immediate
4325/// value parameter as a specifier.
4326///
4327/// \headerfile <x86intrin.h>
4328///
4329/// \code
4330/// __m128i _mm_shufflelo_epi16(__m128i a, const int imm);
4331/// \endcode
4332///
4333/// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.
4334///
4335/// \param a
4336/// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits
4337/// [127:64] of the result.
4338/// \param imm
4339/// An 8-bit immediate value specifying which elements to copy from \a a. \n
4340/// Bits[1:0] are used to assign values to bits [15:0] of the result. \n
4341/// Bits[3:2] are used to assign values to bits [31:16] of the result. \n
4342/// Bits[5:4] are used to assign values to bits [47:32] of the result. \n
4343/// Bits[7:6] are used to assign values to bits [63:48] of the result. \n
4344/// Bit value assignments: \n
4345/// 00: assign values from bits [15:0] of \a a. \n
4346/// 01: assign values from bits [31:16] of \a a. \n
4347/// 10: assign values from bits [47:32] of \a a. \n
4348/// 11: assign values from bits [63:48] of \a a. \n
4349/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
4350/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
4351/// <c>[b6, b4, b2, b0]</c>.
4352/// \returns A 128-bit integer vector containing the shuffled values.
4353#define _mm_shufflelo_epi16(a, imm)((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(
imm)))
\
4354 ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)))
4355
4356/// Constructs a 128-bit integer vector by shuffling four upper 16-bit
4357/// elements of a 128-bit integer vector of [8 x i16], using the immediate
4358/// value parameter as a specifier.
4359///
4360/// \headerfile <x86intrin.h>
4361///
4362/// \code
4363/// __m128i _mm_shufflehi_epi16(__m128i a, const int imm);
4364/// \endcode
4365///
4366/// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction.
4367///
4368/// \param a
4369/// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits
4370/// [63:0] of the result.
4371/// \param imm
4372/// An 8-bit immediate value specifying which elements to copy from \a a. \n
4373/// Bits[1:0] are used to assign values to bits [79:64] of the result. \n
4374/// Bits[3:2] are used to assign values to bits [95:80] of the result. \n
4375/// Bits[5:4] are used to assign values to bits [111:96] of the result. \n
4376/// Bits[7:6] are used to assign values to bits [127:112] of the result. \n
4377/// Bit value assignments: \n
4378/// 00: assign values from bits [79:64] of \a a. \n
4379/// 01: assign values from bits [95:80] of \a a. \n
4380/// 10: assign values from bits [111:96] of \a a. \n
4381/// 11: assign values from bits [127:112] of \a a. \n
4382/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
4383/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
4384/// <c>[b6, b4, b2, b0]</c>.
4385/// \returns A 128-bit integer vector containing the shuffled values.
4386#define _mm_shufflehi_epi16(a, imm)((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(
imm)))
\
4387 ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)))
4388
4389/// Unpacks the high-order (index 8-15) values from two 128-bit vectors
4390/// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
4391///
4392/// \headerfile <x86intrin.h>
4393///
4394/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c>
4395/// instruction.
4396///
4397/// \param __a
4398/// A 128-bit vector of [16 x i8].
4399/// Bits [71:64] are written to bits [7:0] of the result. \n
4400/// Bits [79:72] are written to bits [23:16] of the result. \n
4401/// Bits [87:80] are written to bits [39:32] of the result. \n
4402/// Bits [95:88] are written to bits [55:48] of the result. \n
4403/// Bits [103:96] are written to bits [71:64] of the result. \n
4404/// Bits [111:104] are written to bits [87:80] of the result. \n
4405/// Bits [119:112] are written to bits [103:96] of the result. \n
4406/// Bits [127:120] are written to bits [119:112] of the result.
4407/// \param __b
4408/// A 128-bit vector of [16 x i8]. \n
4409/// Bits [71:64] are written to bits [15:8] of the result. \n
4410/// Bits [79:72] are written to bits [31:24] of the result. \n
4411/// Bits [87:80] are written to bits [47:40] of the result. \n
4412/// Bits [95:88] are written to bits [63:56] of the result. \n
4413/// Bits [103:96] are written to bits [79:72] of the result. \n
4414/// Bits [111:104] are written to bits [95:88] of the result. \n
4415/// Bits [119:112] are written to bits [111:104] of the result. \n
4416/// Bits [127:120] are written to bits [127:120] of the result.
4417/// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
4418static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a,
4419 __m128i __b) {
4420 return (__m128i)__builtin_shufflevector(
4421 (__v16qi)__a, (__v16qi)__b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11,
4422 16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15);
4423}
4424
4425/// Unpacks the high-order (index 4-7) values from two 128-bit vectors of
4426/// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16].
4427///
4428/// \headerfile <x86intrin.h>
4429///
4430/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c>
4431/// instruction.
4432///
4433/// \param __a
4434/// A 128-bit vector of [8 x i16].
4435/// Bits [79:64] are written to bits [15:0] of the result. \n
4436/// Bits [95:80] are written to bits [47:32] of the result. \n
4437/// Bits [111:96] are written to bits [79:64] of the result. \n
4438/// Bits [127:112] are written to bits [111:96] of the result.
4439/// \param __b
4440/// A 128-bit vector of [8 x i16].
4441/// Bits [79:64] are written to bits [31:16] of the result. \n
4442/// Bits [95:80] are written to bits [63:48] of the result. \n
4443/// Bits [111:96] are written to bits [95:80] of the result. \n
4444/// Bits [127:112] are written to bits [127:112] of the result.
4445/// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
4446static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a,
4447 __m128i __b) {
4448 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8 + 4, 5,
4449 8 + 5, 6, 8 + 6, 7, 8 + 7);
4450}
4451
4452/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
4453/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].
4454///
4455/// \headerfile <x86intrin.h>
4456///
4457/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c>
4458/// instruction.
4459///
4460/// \param __a
4461/// A 128-bit vector of [4 x i32]. \n
4462/// Bits [95:64] are written to bits [31:0] of the destination. \n
4463/// Bits [127:96] are written to bits [95:64] of the destination.
4464/// \param __b
4465/// A 128-bit vector of [4 x i32]. \n
4466/// Bits [95:64] are written to bits [64:32] of the destination. \n
4467/// Bits [127:96] are written to bits [127:96] of the destination.
4468/// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
4469static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a,
4470 __m128i __b) {
4471 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4 + 2, 3,
4472 4 + 3);
4473}
4474
4475/// Unpacks the high-order 64-bit elements from two 128-bit vectors of
4476/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
4477///
4478/// \headerfile <x86intrin.h>
4479///
4480/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c>
4481/// instruction.
4482///
4483/// \param __a
4484/// A 128-bit vector of [2 x i64]. \n
4485/// Bits [127:64] are written to bits [63:0] of the destination.
4486/// \param __b
4487/// A 128-bit vector of [2 x i64]. \n
4488/// Bits [127:64] are written to bits [127:64] of the destination.
4489/// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
4490static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a,
4491 __m128i __b) {
4492 return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2 + 1);
4493}
4494
4495/// Unpacks the low-order (index 0-7) values from two 128-bit vectors of
4496/// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
4497///
4498/// \headerfile <x86intrin.h>
4499///
4500/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c>
4501/// instruction.
4502///
4503/// \param __a
4504/// A 128-bit vector of [16 x i8]. \n
4505/// Bits [7:0] are written to bits [7:0] of the result. \n
4506/// Bits [15:8] are written to bits [23:16] of the result. \n
4507/// Bits [23:16] are written to bits [39:32] of the result. \n
4508/// Bits [31:24] are written to bits [55:48] of the result. \n
4509/// Bits [39:32] are written to bits [71:64] of the result. \n
4510/// Bits [47:40] are written to bits [87:80] of the result. \n
4511/// Bits [55:48] are written to bits [103:96] of the result. \n
4512/// Bits [63:56] are written to bits [119:112] of the result.
4513/// \param __b
4514/// A 128-bit vector of [16 x i8].
4515/// Bits [7:0] are written to bits [15:8] of the result. \n
4516/// Bits [15:8] are written to bits [31:24] of the result. \n
4517/// Bits [23:16] are written to bits [47:40] of the result. \n
4518/// Bits [31:24] are written to bits [63:56] of the result. \n
4519/// Bits [39:32] are written to bits [79:72] of the result. \n
4520/// Bits [47:40] are written to bits [95:88] of the result. \n
4521/// Bits [55:48] are written to bits [111:104] of the result. \n
4522/// Bits [63:56] are written to bits [127:120] of the result.
4523/// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
4524static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a,
4525 __m128i __b) {
4526 return (__m128i)__builtin_shufflevector(
4527 (__v16qi)__a, (__v16qi)__b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,
4528 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7);
4529}
4530
4531/// Unpacks the low-order (index 0-3) values from each of the two 128-bit
4532/// vectors of [8 x i16] and interleaves them into a 128-bit vector of
4533/// [8 x i16].
4534///
4535/// \headerfile <x86intrin.h>
4536///
4537/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c>
4538/// instruction.
4539///
4540/// \param __a
4541/// A 128-bit vector of [8 x i16].
4542/// Bits [15:0] are written to bits [15:0] of the result. \n
4543/// Bits [31:16] are written to bits [47:32] of the result. \n
4544/// Bits [47:32] are written to bits [79:64] of the result. \n
4545/// Bits [63:48] are written to bits [111:96] of the result.
4546/// \param __b
4547/// A 128-bit vector of [8 x i16].
4548/// Bits [15:0] are written to bits [31:16] of the result. \n
4549/// Bits [31:16] are written to bits [63:48] of the result. \n
4550/// Bits [47:32] are written to bits [95:80] of the result. \n
4551/// Bits [63:48] are written to bits [127:112] of the result.
4552/// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
4553static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a,
4554 __m128i __b) {
4555 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8 + 0, 1,
4556 8 + 1, 2, 8 + 2, 3, 8 + 3);
4557}
4558
4559/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of
4560/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].
4561///
4562/// \headerfile <x86intrin.h>
4563///
4564/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c>
4565/// instruction.
4566///
4567/// \param __a
4568/// A 128-bit vector of [4 x i32]. \n
4569/// Bits [31:0] are written to bits [31:0] of the destination. \n
4570/// Bits [63:32] are written to bits [95:64] of the destination.
4571/// \param __b
4572/// A 128-bit vector of [4 x i32]. \n
4573/// Bits [31:0] are written to bits [64:32] of the destination. \n
4574/// Bits [63:32] are written to bits [127:96] of the destination.
4575/// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
4576static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a,
4577 __m128i __b) {
4578 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4 + 0, 1,
4579 4 + 1);
4580}
4581
4582/// Unpacks the low-order 64-bit elements from two 128-bit vectors of
4583/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
4584///
4585/// \headerfile <x86intrin.h>
4586///
4587/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
4588/// instruction.
4589///
4590/// \param __a
4591/// A 128-bit vector of [2 x i64]. \n
4592/// Bits [63:0] are written to bits [63:0] of the destination. \n
4593/// \param __b
4594/// A 128-bit vector of [2 x i64]. \n
4595/// Bits [63:0] are written to bits [127:64] of the destination. \n
4596/// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
4597static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a,
4598 __m128i __b) {
4599 return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2 + 0);
4600}
4601
4602/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
4603/// integer.
4604///
4605/// \headerfile <x86intrin.h>
4606///
4607/// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction.
4608///
4609/// \param __a
4610/// A 128-bit integer vector operand. The lower 64 bits are moved to the
4611/// destination.
4612/// \returns A 64-bit integer containing the lower 64 bits of the parameter.
4613static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
4614_mm_movepi64_pi64(__m128i __a) {
4615 return (__m64)__a[0];
4616}
4617
4618/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
4619/// upper bits.
4620///
4621/// \headerfile <x86intrin.h>
4622///
4623/// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction.
4624///
4625/// \param __a
4626/// A 64-bit value.
4627/// \returns A 128-bit integer vector. The lower 64 bits contain the value from
4628/// the operand. The upper 64 bits are assigned zeros.
4629static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4630_mm_movpi64_epi64(__m64 __a) {
4631 return __builtin_shufflevector((__v1di)__a, _mm_setzero_si64(), 0, 1);
4632}
4633
4634/// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit
4635/// integer vector, zeroing the upper bits.
4636///
4637/// \headerfile <x86intrin.h>
4638///
4639/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
4640///
4641/// \param __a
4642/// A 128-bit integer vector operand. The lower 64 bits are moved to the
4643/// destination.
4644/// \returns A 128-bit integer vector. The lower 64 bits contain the value from
4645/// the operand. The upper 64 bits are assigned zeros.
4646static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4647_mm_move_epi64(__m128i __a) {
4648 return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2);
4649}
4650
4651/// Unpacks the high-order 64-bit elements from two 128-bit vectors of
4652/// [2 x double] and interleaves them into a 128-bit vector of [2 x
4653/// double].
4654///
4655/// \headerfile <x86intrin.h>
4656///
4657/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.
4658///
4659/// \param __a
4660/// A 128-bit vector of [2 x double]. \n
4661/// Bits [127:64] are written to bits [63:0] of the destination.
4662/// \param __b
4663/// A 128-bit vector of [2 x double]. \n
4664/// Bits [127:64] are written to bits [127:64] of the destination.
4665/// \returns A 128-bit vector of [2 x double] containing the interleaved values.
4666static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
4667_mm_unpackhi_pd(__m128d __a, __m128d __b) {
4668 return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2 + 1);
4669}
4670
4671/// Unpacks the low-order 64-bit elements from two 128-bit vectors
4672/// of [2 x double] and interleaves them into a 128-bit vector of [2 x
4673/// double].
4674///
4675/// \headerfile <x86intrin.h>
4676///
4677/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
4678///
4679/// \param __a
4680/// A 128-bit vector of [2 x double]. \n
4681/// Bits [63:0] are written to bits [63:0] of the destination.
4682/// \param __b
4683/// A 128-bit vector of [2 x double]. \n
4684/// Bits [63:0] are written to bits [127:64] of the destination.
4685/// \returns A 128-bit vector of [2 x double] containing the interleaved values.
4686static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
4687_mm_unpacklo_pd(__m128d __a, __m128d __b) {
4688 return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2 + 0);
4689}
4690
4691/// Extracts the sign bits of the double-precision values in the 128-bit
4692/// vector of [2 x double], zero-extends the value, and writes it to the
4693/// low-order bits of the destination.
4694///
4695/// \headerfile <x86intrin.h>
4696///
4697/// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction.
4698///
4699/// \param __a
4700/// A 128-bit vector of [2 x double] containing the values with sign bits to
4701/// be extracted.
4702/// \returns The sign bits from each of the double-precision elements in \a __a,
4703/// written to bits [1:0]. The remaining bits are assigned values of zero.
4704static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) {
4705 return __builtin_ia32_movmskpd((__v2df)__a);
4706}
4707
4708/// Constructs a 128-bit floating-point vector of [2 x double] from two
4709/// 128-bit vector parameters of [2 x double], using the immediate-value
4710/// parameter as a specifier.
4711///
4712/// \headerfile <x86intrin.h>
4713///
4714/// \code
4715/// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i);
4716/// \endcode
4717///
4718/// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction.
4719///
4720/// \param a
4721/// A 128-bit vector of [2 x double].
4722/// \param b
4723/// A 128-bit vector of [2 x double].
4724/// \param i
4725/// An 8-bit immediate value. The least significant two bits specify which
4726/// elements to copy from \a a and \a b: \n
4727/// Bit[0] = 0: lower element of \a a copied to lower element of result. \n
4728/// Bit[0] = 1: upper element of \a a copied to lower element of result. \n
4729/// Bit[1] = 0: lower element of \a b copied to upper element of result. \n
4730/// Bit[1] = 1: upper element of \a b copied to upper element of result. \n
4731/// Note: To generate a mask, you can use the \c _MM_SHUFFLE2 macro.
4732/// <c>_MM_SHUFFLE2(b1, b0)</c> can create a 2-bit mask of the form
4733/// <c>[b1, b0]</c>.
4734/// \returns A 128-bit vector of [2 x double] containing the shuffled values.
4735#define _mm_shuffle_pd(a, b, i)((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df
)(__m128d)(b), (int)(i)))
\
4736 ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
4737 (int)(i)))
4738
4739/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
4740/// floating-point vector of [4 x float].
4741///
4742/// \headerfile <x86intrin.h>
4743///
4744/// This intrinsic has no corresponding instruction.
4745///
4746/// \param __a
4747/// A 128-bit floating-point vector of [2 x double].
4748/// \returns A 128-bit floating-point vector of [4 x float] containing the same
4749/// bitwise pattern as the parameter.
4750static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
4751_mm_castpd_ps(__m128d __a) {
4752 return (__m128)__a;
4753}
4754
4755/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
4756/// integer vector.
4757///
4758/// \headerfile <x86intrin.h>
4759///
4760/// This intrinsic has no corresponding instruction.
4761///
4762/// \param __a
4763/// A 128-bit floating-point vector of [2 x double].
4764/// \returns A 128-bit integer vector containing the same bitwise pattern as the
4765/// parameter.
4766static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4767_mm_castpd_si128(__m128d __a) {
4768 return (__m128i)__a;
4769}
4770
4771/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
4772/// floating-point vector of [2 x double].
4773///
4774/// \headerfile <x86intrin.h>
4775///
4776/// This intrinsic has no corresponding instruction.
4777///
4778/// \param __a
4779/// A 128-bit floating-point vector of [4 x float].
4780/// \returns A 128-bit floating-point vector of [2 x double] containing the same
4781/// bitwise pattern as the parameter.
4782static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
4783_mm_castps_pd(__m128 __a) {
4784 return (__m128d)__a;
4785}
4786
4787/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
4788/// integer vector.
4789///
4790/// \headerfile <x86intrin.h>
4791///
4792/// This intrinsic has no corresponding instruction.
4793///
4794/// \param __a
4795/// A 128-bit floating-point vector of [4 x float].
4796/// \returns A 128-bit integer vector containing the same bitwise pattern as the
4797/// parameter.
4798static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4799_mm_castps_si128(__m128 __a) {
4800 return (__m128i)__a;
4801}
4802
4803/// Casts a 128-bit integer vector into a 128-bit floating-point vector
4804/// of [4 x float].
4805///
4806/// \headerfile <x86intrin.h>
4807///
4808/// This intrinsic has no corresponding instruction.
4809///
4810/// \param __a
4811/// A 128-bit integer vector.
4812/// \returns A 128-bit floating-point vector of [4 x float] containing the same
4813/// bitwise pattern as the parameter.
4814static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
4815_mm_castsi128_ps(__m128i __a) {
4816 return (__m128)__a;
4817}
4818
4819/// Casts a 128-bit integer vector into a 128-bit floating-point vector
4820/// of [2 x double].
4821///
4822/// \headerfile <x86intrin.h>
4823///
4824/// This intrinsic has no corresponding instruction.
4825///
4826/// \param __a
4827/// A 128-bit integer vector.
4828/// \returns A 128-bit floating-point vector of [2 x double] containing the same
4829/// bitwise pattern as the parameter.
4830static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
4831_mm_castsi128_pd(__m128i __a) {
4832 return (__m128d)__a;
4833}
4834
4835/// Compares each of the corresponding double-precision values of two
4836/// 128-bit vectors of [2 x double], using the operation specified by the
4837/// immediate integer operand.
4838///
4839/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
4840/// If either value in a comparison is NaN, comparisons that are ordered
4841/// return false, and comparisons that are unordered return true.
4842///
4843/// \headerfile <x86intrin.h>
4844///
4845/// \code
4846/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);
4847/// \endcode
4848///
4849/// This intrinsic corresponds to the <c> (V)CMPPD </c> instruction.
4850///
4851/// \param a
4852/// A 128-bit vector of [2 x double].
4853/// \param b
4854/// A 128-bit vector of [2 x double].
4855/// \param c
4856/// An immediate integer operand, with bits [4:0] specifying which comparison
4857/// operation to use: \n
4858/// 0x00: Equal (ordered, non-signaling) \n
4859/// 0x01: Less-than (ordered, signaling) \n
4860/// 0x02: Less-than-or-equal (ordered, signaling) \n
4861/// 0x03: Unordered (non-signaling) \n
4862/// 0x04: Not-equal (unordered, non-signaling) \n
4863/// 0x05: Not-less-than (unordered, signaling) \n
4864/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
4865/// 0x07: Ordered (non-signaling) \n
4866/// \returns A 128-bit vector of [2 x double] containing the comparison results.
4867#define _mm_cmp_pd(a, b, c)((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), (__v2df)
(__m128d)(b), (c)))
\
4868 ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
4869 (c)))
4870
4871/// Compares each of the corresponding scalar double-precision values of
4872/// two 128-bit vectors of [2 x double], using the operation specified by the
4873/// immediate integer operand.
4874///
4875/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
4876/// If either value in a comparison is NaN, comparisons that are ordered
4877/// return false, and comparisons that are unordered return true.
4878///
4879/// \headerfile <x86intrin.h>
4880///
4881/// \code
4882/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);
4883/// \endcode
4884///
4885/// This intrinsic corresponds to the <c> (V)CMPSD </c> instruction.
4886///
4887/// \param a
4888/// A 128-bit vector of [2 x double].
4889/// \param b
4890/// A 128-bit vector of [2 x double].
4891/// \param c
4892/// An immediate integer operand, with bits [4:0] specifying which comparison
4893/// operation to use: \n
4894/// 0x00: Equal (ordered, non-signaling) \n
4895/// 0x01: Less-than (ordered, signaling) \n
4896/// 0x02: Less-than-or-equal (ordered, signaling) \n
4897/// 0x03: Unordered (non-signaling) \n
4898/// 0x04: Not-equal (unordered, non-signaling) \n
4899/// 0x05: Not-less-than (unordered, signaling) \n
4900/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
4901/// 0x07: Ordered (non-signaling) \n
4902/// \returns A 128-bit vector of [2 x double] containing the comparison results.
4903#define _mm_cmp_sd(a, b, c)((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), (__v2df)
(__m128d)(b), (c)))
\
4904 ((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
4905 (c)))
4906
4907#if defined(__cplusplus)
4908extern "C" {
4909#endif
4910
4911/// Indicates that a spin loop is being executed for the purposes of
4912/// optimizing power consumption during the loop.
4913///
4914/// \headerfile <x86intrin.h>
4915///
4916/// This intrinsic corresponds to the <c> PAUSE </c> instruction.
4917///
4918void _mm_pause(void);
4919
4920#if defined(__cplusplus)
4921} // extern "C"
4922#endif
4923
4924#undef __anyext128
4925#undef __trunc64
4926#undef __DEFAULT_FN_ATTRS
4927#undef __DEFAULT_FN_ATTRS_CONSTEXPR
4928
4929#define _MM_SHUFFLE2(x, y)(((x) << 1) | (y)) (((x) << 1) | (y))
4930
4931#define _MM_DENORMALS_ZERO_ON(0x0040U) (0x0040U)
4932#define _MM_DENORMALS_ZERO_OFF(0x0000U) (0x0000U)
4933
4934#define _MM_DENORMALS_ZERO_MASK(0x0040U) (0x0040U)
4935
4936#define _MM_GET_DENORMALS_ZERO_MODE()(_mm_getcsr() & (0x0040U)) (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK(0x0040U))
4937#define _MM_SET_DENORMALS_ZERO_MODE(x)(_mm_setcsr((_mm_getcsr() & ~(0x0040U)) | (x))) \
4938 (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK(0x0040U)) | (x)))
4939
4940#endif /* __EMMINTRIN_H */