/usr/lib/llvm-19/lib/clang/19/include/emmintrin.h

Bug Summary

File:	usr/lib/llvm-19/lib/clang/19/include/emmintrin.h
Warning:	line 3444, column 10 Access to field '__v' results in a dereference of a null pointer (loaded from variable '__p')

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name pixman-sse2.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/gfx/cairo/libpixman/src -fcoverage-compilation-dir=/var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/gfx/cairo/libpixman/src -resource-dir /usr/lib/llvm-19/lib/clang/19 -include /var/lib/jenkins/workspace/firefox-scan-build/config/gcc_hidden.h -include /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/mozilla-config.h -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/system_wrappers -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D DEBUG=1 -D HAVE_PTHREADS -D PACKAGE=mozpixman -D USE_X86_MMX -D USE_SSE2 -D USE_SSSE3 -D MOZ_HAS_MOZGLUE -D MOZILLA_INTERNAL_API -D IMPL_LIBXUL -D MOZ_SUPPORT_LEAKCHECKING -D STATIC_EXPORTABLE_JS_API -I /var/lib/jenkins/workspace/firefox-scan-build/gfx/cairo/libpixman/src -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/gfx/cairo/libpixman/src -I /var/lib/jenkins/workspace/firefox-scan-build/gfx/cairo/cairo/src -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /var/lib/jenkins/workspace/firefox-scan-build/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/llvm-19/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=tautological-type-limit-compare -Wno-range-loop-analysis -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-unknown-warning-option -Wno-address -Wno-braced-scalar-init -Wno-missing-field-initializers -Wno-sign-compare -Wno-incompatible-pointer-types -Wno-unused -Wno-incompatible-pointer-types -Wno-tautological-compare -Wno-tautological-constant-out-of-range-compare -std=gnu99 -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2024-09-22-115206-3586786-1 -x c /var/lib/jenkins/workspace/firefox-scan-build/gfx/cairo/libpixman/src/pixman-sse2.c

/var/lib/jenkins/workspace/firefox-scan-build/gfx/cairo/libpixman/src/pixman-sse2.c

→

1/*
2 * Copyright © 2008 Rodrigo Kumpera
3 * Copyright © 2008 André Tupinambá
4 *
5 * Permission to use, copy, modify, distribute, and sell this software and its
6 * documentation for any purpose is hereby granted without fee, provided that
7 * the above copyright notice appear in all copies and that both that
8 * copyright notice and this permission notice appear in supporting
9 * documentation, and that the name of Red Hat not be used in advertising or
10 * publicity pertaining to distribution of the software without specific,
11 * written prior permission.  Red Hat makes no representations about the
12 * suitability of this software for any purpose.  It is provided "as is"
13 * without express or implied warranty.
14 *
15 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
20 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
21 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
22 * SOFTWARE.
23 *
24 * Author:  Rodrigo Kumpera (kumpera@gmail.com)
25 *          André Tupinambá (andrelrt@gmail.com)
26 *
27 * Based on work by Owen Taylor and Søren Sandmann
28 */
29#ifdef HAVE_CONFIG_H
30#include <pixman-config.h>
31#endif
32 
33/* PSHUFD is slow on a lot of old processors, and new processors have SSSE3 */
34#define PSHUFD_IS_FAST0 0
35 
36#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
37#include <emmintrin.h> /* for SSE2 intrinsics */
38#include "pixman-private.h"
39#include "pixman-combine32.h"
40#include "pixman-inlines.h"
41 
42static __m128i mask_0080;
43static __m128i mask_00ff;
44static __m128i mask_0101;
45static __m128i mask_ffff;
46static __m128i mask_ff000000;
47static __m128i mask_alpha;
48 
49static __m128i mask_565_r;
50static __m128i mask_565_g1, mask_565_g2;
51static __m128i mask_565_b;
52static __m128i mask_red;
53static __m128i mask_green;
54static __m128i mask_blue;
55 
56static __m128i mask_565_fix_rb;
57static __m128i mask_565_fix_g;
58 
59static __m128i mask_565_rb;
60static __m128i mask_565_pack_multiplier;
61 
62static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
63unpack_32_1x128 (uint32_t data)
64{
65    return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ());
66}
67 
68static force_inline__inline__ __attribute__ ((__always_inline__)) void
69unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi)
70{
71    *data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ());
72    *data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ());
73}
74 
75static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
76unpack_565_to_8888 (__m128i lo)
77{
78    __m128i r, g, b, rb, t;
79 
80    r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red);
81    g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green);
82    b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue);
83 
84    rb = _mm_or_si128 (r, b);
85    t  = _mm_and_si128 (rb, mask_565_fix_rb);
86    t  = _mm_srli_epi32 (t, 5);
87    rb = _mm_or_si128 (rb, t);
88 
89    t  = _mm_and_si128 (g, mask_565_fix_g);
90    t  = _mm_srli_epi32 (t, 6);
91    g  = _mm_or_si128 (g, t);
92 
93    return _mm_or_si128 (rb, g);
94}
95 
96static force_inline__inline__ __attribute__ ((__always_inline__)) void
97unpack_565_128_4x128 (__m128i  data,
98                      __m128i* data0,
99                      __m128i* data1,
100                      __m128i* data2,
101                      __m128i* data3)
102{
103    __m128i lo, hi;
104 
105    lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
106    hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
107 
108    lo = unpack_565_to_8888 (lo);
109    hi = unpack_565_to_8888 (hi);
110 
111    unpack_128_2x128 (lo, data0, data1);
112    unpack_128_2x128 (hi, data2, data3);
113}
114 
115static force_inline__inline__ __attribute__ ((__always_inline__)) uint16_t
116pack_565_32_16 (uint32_t pixel)
117{
118    return (uint16_t) (((pixel >> 8) & 0xf800) |
119		       ((pixel >> 5) & 0x07e0) |
120		       ((pixel >> 3) & 0x001f));
121}
122 
123static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
124pack_2x128_128 (__m128i lo, __m128i hi)
125{
126    return _mm_packus_epi16 (lo, hi);
127}
128 
129static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
130pack_565_2packedx128_128 (__m128i lo, __m128i hi)
131{
132    __m128i rb0 = _mm_and_si128 (lo, mask_565_rb);
133    __m128i rb1 = _mm_and_si128 (hi, mask_565_rb);
134 
135    __m128i t0 = _mm_madd_epi16 (rb0, mask_565_pack_multiplier);
136    __m128i t1 = _mm_madd_epi16 (rb1, mask_565_pack_multiplier);
137 
138    __m128i g0 = _mm_and_si128 (lo, mask_green);
139    __m128i g1 = _mm_and_si128 (hi, mask_green);
140 
141    t0 = _mm_or_si128 (t0, g0);
142    t1 = _mm_or_si128 (t1, g1);
143 
144    /* Simulates _mm_packus_epi32 */
145    t0 = _mm_slli_epi32 (t0, 16 - 5);
146    t1 = _mm_slli_epi32 (t1, 16 - 5);
147    t0 = _mm_srai_epi32 (t0, 16);
148    t1 = _mm_srai_epi32 (t1, 16);
149    return _mm_packs_epi32 (t0, t1);
150}
151 
152static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
153pack_565_2x128_128 (__m128i lo, __m128i hi)
154{
155    __m128i data;
156    __m128i r, g1, g2, b;
157 
158    data = pack_2x128_128 (lo, hi);
159 
160    r  = _mm_and_si128 (data, mask_565_r);
161    g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1);
162    g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2);
163    b  = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b);
164 
165    return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b);
166}
167 
168static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
169pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3)
170{
171    return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1),
172			     pack_565_2x128_128 (*xmm2, *xmm3));
173}
174 
175static force_inline__inline__ __attribute__ ((__always_inline__)) int
176is_opaque (__m128i x)
177{
178    __m128i ffs = _mm_cmpeq_epi8 (x, x);
179 
180    return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888;
181}
182 
183static force_inline__inline__ __attribute__ ((__always_inline__)) int
184is_zero (__m128i x)
185{
186    return _mm_movemask_epi8 (
187	_mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff;
188}
189 
190static force_inline__inline__ __attribute__ ((__always_inline__)) int
191is_transparent (__m128i x)
192{
193    return (_mm_movemask_epi8 (
194		_mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888;
195}
196 
197static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
198expand_pixel_32_1x128 (uint32_t data)
199{
200    return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0))((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(unpack_32_1x128
 (data)), (int)((((1) << 6) | ((0) << 4) | ((1) <<
 2) | (0)))));
201}
202 
203static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
204expand_alpha_1x128 (__m128i data)
205{
206    return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data,((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(((__m128i)
__builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int)((((3) <<
 6) | ((3) << 4) | ((3) << 2) | (3)))))), (int)((
((3) << 6) | ((3) << 4) | ((3) << 2) | (3))
)))
207						     _MM_SHUFFLE (3, 3, 3, 3)),((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(((__m128i)
__builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int)((((3) <<
 6) | ((3) << 4) | ((3) << 2) | (3)))))), (int)((
((3) << 6) | ((3) << 4) | ((3) << 2) | (3))
)))
208				_MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(((__m128i)
__builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int)((((3) <<
 6) | ((3) << 4) | ((3) << 2) | (3)))))), (int)((
((3) << 6) | ((3) << 4) | ((3) << 2) | (3))
)));
209}
210 
211static force_inline__inline__ __attribute__ ((__always_inline__)) void
212expand_alpha_2x128 (__m128i  data_lo,
213                    __m128i  data_hi,
214                    __m128i* alpha_lo,
215                    __m128i* alpha_hi)
216{
217    __m128i lo, hi;
218 
219    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_lo), (
int)((((3) << 6) | ((3) << 4) | ((3) << 2) |
 (3)))));
220    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_hi), (
int)((((3) << 6) | ((3) << 4) | ((3) << 2) |
 (3)))));
221 
222    *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(lo), (int)
((((3) << 6) | ((3) << 4) | ((3) << 2) | (3
)))));
223    *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(hi), (int)
((((3) << 6) | ((3) << 4) | ((3) << 2) | (3
)))));
224}
225 
226static force_inline__inline__ __attribute__ ((__always_inline__)) void
227expand_alpha_rev_2x128 (__m128i  data_lo,
228                        __m128i  data_hi,
229                        __m128i* alpha_lo,
230                        __m128i* alpha_hi)
231{
232    __m128i lo, hi;
233 
234    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_lo), (
int)((((0) << 6) | ((0) << 4) | ((0) << 2) |
 (0)))));
235    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_hi), (
int)((((0) << 6) | ((0) << 4) | ((0) << 2) |
 (0)))));
236    *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(lo), (int)
((((0) << 6) | ((0) << 4) | ((0) << 2) | (0
)))));
237    *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(hi), (int)
((((0) << 6) | ((0) << 4) | ((0) << 2) | (0
)))));
238}
239 
240static force_inline__inline__ __attribute__ ((__always_inline__)) void
241pix_multiply_2x128 (__m128i* data_lo,
242                    __m128i* data_hi,
243                    __m128i* alpha_lo,
244                    __m128i* alpha_hi,
245                    __m128i* ret_lo,
246                    __m128i* ret_hi)
247{
248    __m128i lo, hi;
249 
250    lo = _mm_mullo_epi16 (*data_lo, *alpha_lo);
251    hi = _mm_mullo_epi16 (*data_hi, *alpha_hi);
252    lo = _mm_adds_epu16 (lo, mask_0080);
253    hi = _mm_adds_epu16 (hi, mask_0080);
254    *ret_lo = _mm_mulhi_epu16 (lo, mask_0101);
255    *ret_hi = _mm_mulhi_epu16 (hi, mask_0101);
256}
257 
258static force_inline__inline__ __attribute__ ((__always_inline__)) void
259pix_add_multiply_2x128 (__m128i* src_lo,
260                        __m128i* src_hi,
261                        __m128i* alpha_dst_lo,
262                        __m128i* alpha_dst_hi,
263                        __m128i* dst_lo,
264                        __m128i* dst_hi,
265                        __m128i* alpha_src_lo,
266                        __m128i* alpha_src_hi,
267                        __m128i* ret_lo,
268                        __m128i* ret_hi)
269{
270    __m128i t1_lo, t1_hi;
271    __m128i t2_lo, t2_hi;
272 
273    pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi);
274    pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi);
275 
276    *ret_lo = _mm_adds_epu8 (t1_lo, t2_lo);
277    *ret_hi = _mm_adds_epu8 (t1_hi, t2_hi);
278}
279 
280static force_inline__inline__ __attribute__ ((__always_inline__)) void
281negate_2x128 (__m128i  data_lo,
282              __m128i  data_hi,
283              __m128i* neg_lo,
284              __m128i* neg_hi)
285{
286    *neg_lo = _mm_xor_si128 (data_lo, mask_00ff);
287    *neg_hi = _mm_xor_si128 (data_hi, mask_00ff);
288}
289 
290static force_inline__inline__ __attribute__ ((__always_inline__)) void
291invert_colors_2x128 (__m128i  data_lo,
292                     __m128i  data_hi,
293                     __m128i* inv_lo,
294                     __m128i* inv_hi)
295{
296    __m128i lo, hi;
297 
298    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_lo), (
int)((((3) << 6) | ((0) << 4) | ((1) << 2) |
 (2)))));
299    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data_hi), (
int)((((3) << 6) | ((0) << 4) | ((1) << 2) |
 (2)))));
300    *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(lo), (int)
((((3) << 6) | ((0) << 4) | ((1) << 2) | (2
)))));
301    *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(hi), (int)
((((3) << 6) | ((0) << 4) | ((1) << 2) | (2
)))));
302}
303 
304static force_inline__inline__ __attribute__ ((__always_inline__)) void
305over_2x128 (__m128i* src_lo,
306            __m128i* src_hi,
307            __m128i* alpha_lo,
308            __m128i* alpha_hi,
309            __m128i* dst_lo,
310            __m128i* dst_hi)
311{
312    __m128i t1, t2;
313 
314    negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);
315 
316    pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);
317 
318    *dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo);
319    *dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi);
320}
321 
322static force_inline__inline__ __attribute__ ((__always_inline__)) void
323over_rev_non_pre_2x128 (__m128i  src_lo,
324                        __m128i  src_hi,
325                        __m128i* dst_lo,
326                        __m128i* dst_hi)
327{
328    __m128i lo, hi;
329    __m128i alpha_lo, alpha_hi;
330 
331    expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi);
332 
333    lo = _mm_or_si128 (alpha_lo, mask_alpha);
334    hi = _mm_or_si128 (alpha_hi, mask_alpha);
335 
336    invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi);
337 
338    pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi);
339 
340    over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi);
341}
342 
343static force_inline__inline__ __attribute__ ((__always_inline__)) void
344in_over_2x128 (__m128i* src_lo,
345               __m128i* src_hi,
346               __m128i* alpha_lo,
347               __m128i* alpha_hi,
348               __m128i* mask_lo,
349               __m128i* mask_hi,
350               __m128i* dst_lo,
351               __m128i* dst_hi)
352{
353    __m128i s_lo, s_hi;
354    __m128i a_lo, a_hi;
355 
356    pix_multiply_2x128 (src_lo,   src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
357    pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
358 
359    over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
360}
361 
362/* load 4 pixels from a 16-byte boundary aligned address */
363static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
364load_128_aligned (__m128i* src)
365{
366    return _mm_load_si128 (src);
367}
368 
369/* load 4 pixels from a unaligned address */
370static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
371load_128_unaligned (const __m128i* src)
372{
373    return _mm_loadu_si128 (src);
12
←
Passing null pointer value via 1st parameter '__p'→
13
←
Calling '_mm_loadu_si128'→
374}
375 
376/* save 4 pixels on a 16-byte boundary aligned address */
377static force_inline__inline__ __attribute__ ((__always_inline__)) void
378save_128_aligned (__m128i* dst,
379                  __m128i  data)
380{
381    _mm_store_si128 (dst, data);
382}
383 
384static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
385load_32_1x128 (uint32_t data)
386{
387    return _mm_cvtsi32_si128 (data);
388}
389 
390static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
391expand_alpha_rev_1x128 (__m128i data)
392{
393    return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int
)((((0) << 6) | ((0) << 4) | ((0) << 2) | (
0)))));
394}
395 
396static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
397expand_pixel_8_1x128 (uint8_t data)
398{
399    return _mm_shufflelo_epi16 (((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(unpack_32_1x128
 ((uint32_t)data)), (int)((((0) << 6) | ((0) << 4
) | ((0) << 2) | (0)))))
400	unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(unpack_32_1x128
 ((uint32_t)data)), (int)((((0) << 6) | ((0) << 4
) | ((0) << 2) | (0)))));
401}
402 
403static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
404pix_multiply_1x128 (__m128i data,
405		    __m128i alpha)
406{
407    return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha),
408					    mask_0080),
409			    mask_0101);
410}
411 
412static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
413pix_add_multiply_1x128 (__m128i* src,
414			__m128i* alpha_dst,
415			__m128i* dst,
416			__m128i* alpha_src)
417{
418    __m128i t1 = pix_multiply_1x128 (*src, *alpha_dst);
419    __m128i t2 = pix_multiply_1x128 (*dst, *alpha_src);
420 
421    return _mm_adds_epu8 (t1, t2);
422}
423 
424static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
425negate_1x128 (__m128i data)
426{
427    return _mm_xor_si128 (data, mask_00ff);
428}
429 
430static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
431invert_colors_1x128 (__m128i data)
432{
433    return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(data), (int
)((((3) << 6) | ((0) << 4) | ((1) << 2) | (
2)))));
434}
435 
436static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
437over_1x128 (__m128i src, __m128i alpha, __m128i dst)
438{
439    return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha)));
440}
441 
442static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
443in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst)
444{
445    return over_1x128 (pix_multiply_1x128 (*src, *mask),
446		       pix_multiply_1x128 (*alpha, *mask),
447		       *dst);
448}
449 
450static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
451over_rev_non_pre_1x128 (__m128i src, __m128i dst)
452{
453    __m128i alpha = expand_alpha_1x128 (src);
454 
455    return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src),
456					   _mm_or_si128 (alpha, mask_alpha)),
457		       alpha,
458		       dst);
459}
460 
461static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
462pack_1x128_32 (__m128i data)
463{
464    return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ()));
465}
466 
467static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
468expand565_16_1x128 (uint16_t pixel)
469{
470    __m128i m = _mm_cvtsi32_si128 (pixel);
471 
472    m = unpack_565_to_8888 (m);
473 
474    return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ());
475}
476 
477static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
478core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
479{
480    uint8_t a;
481    __m128i xmms;
482 
483    a = src >> 24;
484 
485    if (a == 0xff)
486    {
487	return src;
488    }
489    else if (src)
490    {
491	xmms = unpack_32_1x128 (src);
492	return pack_1x128_32 (
493	    over_1x128 (xmms, expand_alpha_1x128 (xmms),
494			unpack_32_1x128 (dst)));
495    }
496 
497    return dst;
498}
499 
500static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
501combine1 (const uint32_t *ps, const uint32_t *pm)
502{
503    uint32_t s;
504    memcpy(&s, ps, sizeof(uint32_t));
505 
506    if (pm)
507    {
508	__m128i ms, mm;
509 
510	mm = unpack_32_1x128 (*pm);
511	mm = expand_alpha_1x128 (mm);
512 
513	ms = unpack_32_1x128 (s);
514	ms = pix_multiply_1x128 (ms, mm);
515 
516	s = pack_1x128_32 (ms);
517    }
518 
519    return s;
520}
521 
522static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
523combine4 (const __m128i *ps, const __m128i *pm)
524{
525    __m128i xmm_src_lo, xmm_src_hi;
526    __m128i xmm_msk_lo, xmm_msk_hi;
527    __m128i s;
528 
529    if (pm)
530    {
531	xmm_msk_lo = load_128_unaligned (pm);
532 
533	if (is_transparent (xmm_msk_lo))
534	    return _mm_setzero_si128 ();
535    }
536 
537    s = load_128_unaligned (ps);
538 
539    if (pm)
540    {
541	unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi);
542	unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi);
543 
544	expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi);
545 
546	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
547			    &xmm_msk_lo, &xmm_msk_hi,
548			    &xmm_src_lo, &xmm_src_hi);
549 
550	s = pack_2x128_128 (xmm_src_lo, xmm_src_hi);
551    }
552 
553    return s;
554}
555 
556static force_inline__inline__ __attribute__ ((__always_inline__)) void
557core_combine_over_u_sse2_mask (uint32_t *	  pd,
558			       const uint32_t*    ps,
559			       const uint32_t*    pm,
560			       int                w)
561{
562    uint32_t s, d;
563 
564    /* Align dst on a 16-byte boundary */
565    while (w && ((uintptr_t)pd & 15))
1
Assuming 'w' is not equal to 0→
2
←
Loop condition is true.  Entering loop body→
6
←
Assuming 'w' is not equal to 0→
7
←
Loop condition is false. Execution continues on line 578→
566    {
567	d = *pd;
568	s = combine1 (ps, pm);
569 
570	if (s)
3
←
Assuming 's' is 0→
4
←
Taking false branch→
571	    *pd = core_combine_over_u_pixel_sse2 (s, d);
572	pd++;
573	ps++;
574	pm++;
5
←
Null pointer value stored to 'pm'→
575	w--;
576    }
577 
578    while (w >= 4)
8
←
Assuming 'w' is >= 4→
9
←
Loop condition is true.  Entering loop body→
579    {
580	__m128i mask = load_128_unaligned ((__m128i *)pm);
10
←
Passing null pointer value via 1st parameter 'src'→
11
←
Calling 'load_128_unaligned'→
581 
582	if (!is_zero (mask))
583	{
584	    __m128i src;
585	    __m128i src_hi, src_lo;
586	    __m128i mask_hi, mask_lo;
587	    __m128i alpha_hi, alpha_lo;
588 
589	    src = load_128_unaligned ((__m128i *)ps);
590 
591	    if (is_opaque (_mm_and_si128 (src, mask)))
592	    {
593		save_128_aligned ((__m128i *)pd, src);
594	    }
595	    else
596	    {
597		__m128i dst = load_128_aligned ((__m128i *)pd);
598		__m128i dst_hi, dst_lo;
599 
600		unpack_128_2x128 (mask, &mask_lo, &mask_hi);
601		unpack_128_2x128 (src, &src_lo, &src_hi);
602 
603		expand_alpha_2x128 (mask_lo, mask_hi, &mask_lo, &mask_hi);
604		pix_multiply_2x128 (&src_lo, &src_hi,
605				    &mask_lo, &mask_hi,
606				    &src_lo, &src_hi);
607 
608		unpack_128_2x128 (dst, &dst_lo, &dst_hi);
609 
610		expand_alpha_2x128 (src_lo, src_hi,
611				    &alpha_lo, &alpha_hi);
612 
613		over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
614			    &dst_lo, &dst_hi);
615 
616		save_128_aligned (
617		    (__m128i *)pd,
618		    pack_2x128_128 (dst_lo, dst_hi));
619	    }
620	}
621 
622	pm += 4;
623	ps += 4;
624	pd += 4;
625	w -= 4;
626    }
627    while (w)
628    {
629	d = *pd;
630	s = combine1 (ps, pm);
631 
632	if (s)
633	    *pd = core_combine_over_u_pixel_sse2 (s, d);
634	pd++;
635	ps++;
636	pm++;
637 
638	w--;
639    }
640}
641 
642static force_inline__inline__ __attribute__ ((__always_inline__)) void
643core_combine_over_u_sse2_no_mask (uint32_t *	  pd,
644				  const uint32_t*    ps,
645				  int                w)
646{
647    uint32_t s, d;
648 
649    /* Align dst on a 16-byte boundary */
650    while (w && ((uintptr_t)pd & 15))
651    {
652	d = *pd;
653	s = *ps;
654 
655	if (s)
656	    *pd = core_combine_over_u_pixel_sse2 (s, d);
657	pd++;
658	ps++;
659	w--;
660    }
661 
662    while (w >= 4)
663    {
664	__m128i src;
665	__m128i src_hi, src_lo, dst_hi, dst_lo;
666	__m128i alpha_hi, alpha_lo;
667 
668	src = load_128_unaligned ((__m128i *)ps);
669 
670	if (!is_zero (src))
671	{
672	    if (is_opaque (src))
673	    {
674		save_128_aligned ((__m128i *)pd, src);
675	    }
676	    else
677	    {
678		__m128i dst = load_128_aligned ((__m128i *)pd);
679 
680		unpack_128_2x128 (src, &src_lo, &src_hi);
681		unpack_128_2x128 (dst, &dst_lo, &dst_hi);
682 
683		expand_alpha_2x128 (src_lo, src_hi,
684				    &alpha_lo, &alpha_hi);
685		over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
686			    &dst_lo, &dst_hi);
687 
688		save_128_aligned (
689		    (__m128i *)pd,
690		    pack_2x128_128 (dst_lo, dst_hi));
691	    }
692	}
693 
694	ps += 4;
695	pd += 4;
696	w -= 4;
697    }
698    while (w)
699    {
700	d = *pd;
701	s = *ps;
702 
703	if (s)
704	    *pd = core_combine_over_u_pixel_sse2 (s, d);
705	pd++;
706	ps++;
707 
708	w--;
709    }
710}
711 
712static force_inline__inline__ __attribute__ ((__always_inline__)) void
713sse2_combine_over_u (pixman_implementation_t *imp,
714                     pixman_op_t              op,
715                     uint32_t *               pd,
716                     const uint32_t *         ps,
717                     const uint32_t *         pm,
718                     int                      w)
719{
720    if (pm)
721	core_combine_over_u_sse2_mask (pd, ps, pm, w);
722    else
723	core_combine_over_u_sse2_no_mask (pd, ps, w);
724}
725 
726static void
727sse2_combine_over_reverse_u (pixman_implementation_t *imp,
728                             pixman_op_t              op,
729                             uint32_t *               pd,
730                             const uint32_t *         ps,
731                             const uint32_t *         pm,
732                             int                      w)
733{
734    uint32_t s, d;
735 
736    __m128i xmm_dst_lo, xmm_dst_hi;
737    __m128i xmm_src_lo, xmm_src_hi;
738    __m128i xmm_alpha_lo, xmm_alpha_hi;
739 
740    /* Align dst on a 16-byte boundary */
741    while (w &&
742           ((uintptr_t)pd & 15))
743    {
744	d = *pd;
745	s = combine1 (ps, pm);
746 
747	*pd++ = core_combine_over_u_pixel_sse2 (d, s);
748	w--;
749	ps++;
750	if (pm)
751	    pm++;
752    }
753 
754    while (w >= 4)
755    {
756	/* I'm loading unaligned because I'm not sure
757	 * about the address alignment.
758	 */
759	xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
760	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
761 
762	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
763	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
764 
765	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
766			    &xmm_alpha_lo, &xmm_alpha_hi);
767 
768	over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
769		    &xmm_alpha_lo, &xmm_alpha_hi,
770		    &xmm_src_lo, &xmm_src_hi);
771 
772	/* rebuid the 4 pixel data and save*/
773	save_128_aligned ((__m128i*)pd,
774			  pack_2x128_128 (xmm_src_lo, xmm_src_hi));
775 
776	w -= 4;
777	ps += 4;
778	pd += 4;
779 
780	if (pm)
781	    pm += 4;
782    }
783 
784    while (w)
785    {
786	d = *pd;
787	s = combine1 (ps, pm);
788 
789	*pd++ = core_combine_over_u_pixel_sse2 (d, s);
790	ps++;
791	w--;
792	if (pm)
793	    pm++;
794    }
795}
796 
797static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
798core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst)
799{
800    uint32_t maska = src >> 24;
801 
802    if (maska == 0)
803    {
804	return 0;
805    }
806    else if (maska != 0xff)
807    {
808	return pack_1x128_32 (
809	    pix_multiply_1x128 (unpack_32_1x128 (dst),
810				expand_alpha_1x128 (unpack_32_1x128 (src))));
811    }
812 
813    return dst;
814}
815 
816static void
817sse2_combine_in_u (pixman_implementation_t *imp,
818                   pixman_op_t              op,
819                   uint32_t *               pd,
820                   const uint32_t *         ps,
821                   const uint32_t *         pm,
822                   int                      w)
823{
824    uint32_t s, d;
825 
826    __m128i xmm_src_lo, xmm_src_hi;
827    __m128i xmm_dst_lo, xmm_dst_hi;
828 
829    while (w && ((uintptr_t)pd & 15))
830    {
831	s = combine1 (ps, pm);
832	d = *pd;
833 
834	*pd++ = core_combine_in_u_pixel_sse2 (d, s);
835	w--;
836	ps++;
837	if (pm)
838	    pm++;
839    }
840 
841    while (w >= 4)
842    {
843	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
844	xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm);
845 
846	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
847	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
848 
849	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
850	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
851			    &xmm_dst_lo, &xmm_dst_hi,
852			    &xmm_dst_lo, &xmm_dst_hi);
853 
854	save_128_aligned ((__m128i*)pd,
855			  pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
856 
857	ps += 4;
858	pd += 4;
859	w -= 4;
860	if (pm)
861	    pm += 4;
862    }
863 
864    while (w)
865    {
866	s = combine1 (ps, pm);
867	d = *pd;
868 
869	*pd++ = core_combine_in_u_pixel_sse2 (d, s);
870	w--;
871	ps++;
872	if (pm)
873	    pm++;
874    }
875}
876 
877static void
878sse2_combine_in_reverse_u (pixman_implementation_t *imp,
879                           pixman_op_t              op,
880                           uint32_t *               pd,
881                           const uint32_t *         ps,
882                           const uint32_t *         pm,
883                           int                      w)
884{
885    uint32_t s, d;
886 
887    __m128i xmm_src_lo, xmm_src_hi;
888    __m128i xmm_dst_lo, xmm_dst_hi;
889 
890    while (w && ((uintptr_t)pd & 15))
891    {
892	s = combine1 (ps, pm);
893	d = *pd;
894 
895	*pd++ = core_combine_in_u_pixel_sse2 (s, d);
896	ps++;
897	w--;
898	if (pm)
899	    pm++;
900    }
901 
902    while (w >= 4)
903    {
904	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
905	xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
906 
907	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
908	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
909 
910	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
911	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
912			    &xmm_src_lo, &xmm_src_hi,
913			    &xmm_dst_lo, &xmm_dst_hi);
914 
915	save_128_aligned (
916	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
917 
918	ps += 4;
919	pd += 4;
920	w -= 4;
921	if (pm)
922	    pm += 4;
923    }
924 
925    while (w)
926    {
927	s = combine1 (ps, pm);
928	d = *pd;
929 
930	*pd++ = core_combine_in_u_pixel_sse2 (s, d);
931	w--;
932	ps++;
933	if (pm)
934	    pm++;
935    }
936}
937 
938static void
939sse2_combine_out_reverse_u (pixman_implementation_t *imp,
940                            pixman_op_t              op,
941                            uint32_t *               pd,
942                            const uint32_t *         ps,
943                            const uint32_t *         pm,
944                            int                      w)
945{
946    while (w && ((uintptr_t)pd & 15))
947    {
948	uint32_t s = combine1 (ps, pm);
949	uint32_t d = *pd;
950 
951	*pd++ = pack_1x128_32 (
952	    pix_multiply_1x128 (
953		unpack_32_1x128 (d), negate_1x128 (
954		    expand_alpha_1x128 (unpack_32_1x128 (s)))));
955 
956	if (pm)
957	    pm++;
958	ps++;
959	w--;
960    }
961 
962    while (w >= 4)
963    {
964	__m128i xmm_src_lo, xmm_src_hi;
965	__m128i xmm_dst_lo, xmm_dst_hi;
966 
967	xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
968	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
969 
970	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
971	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
972 
973	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
974	negate_2x128       (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
975 
976	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
977			    &xmm_src_lo, &xmm_src_hi,
978			    &xmm_dst_lo, &xmm_dst_hi);
979 
980	save_128_aligned (
981	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
982 
983	ps += 4;
984	pd += 4;
985	if (pm)
986	    pm += 4;
987 
988	w -= 4;
989    }
990 
991    while (w)
992    {
993	uint32_t s = combine1 (ps, pm);
994	uint32_t d = *pd;
995 
996	*pd++ = pack_1x128_32 (
997	    pix_multiply_1x128 (
998		unpack_32_1x128 (d), negate_1x128 (
999		    expand_alpha_1x128 (unpack_32_1x128 (s)))));
1000	ps++;
1001	if (pm)
1002	    pm++;
1003	w--;
1004    }
1005}
1006 
1007static void
1008sse2_combine_out_u (pixman_implementation_t *imp,
1009                    pixman_op_t              op,
1010                    uint32_t *               pd,
1011                    const uint32_t *         ps,
1012                    const uint32_t *         pm,
1013                    int                      w)
1014{
1015    while (w && ((uintptr_t)pd & 15))
1016    {
1017	uint32_t s = combine1 (ps, pm);
1018	uint32_t d = *pd;
1019 
1020	*pd++ = pack_1x128_32 (
1021	    pix_multiply_1x128 (
1022		unpack_32_1x128 (s), negate_1x128 (
1023		    expand_alpha_1x128 (unpack_32_1x128 (d)))));
1024	w--;
1025	ps++;
1026	if (pm)
1027	    pm++;
1028    }
1029 
1030    while (w >= 4)
1031    {
1032	__m128i xmm_src_lo, xmm_src_hi;
1033	__m128i xmm_dst_lo, xmm_dst_hi;
1034 
1035	xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
1036	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
1037 
1038	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1039	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1040 
1041	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1042	negate_2x128       (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1043 
1044	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1045			    &xmm_dst_lo, &xmm_dst_hi,
1046			    &xmm_dst_lo, &xmm_dst_hi);
1047 
1048	save_128_aligned (
1049	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1050 
1051	ps += 4;
1052	pd += 4;
1053	w -= 4;
1054	if (pm)
1055	    pm += 4;
1056    }
1057 
1058    while (w)
1059    {
1060	uint32_t s = combine1 (ps, pm);
1061	uint32_t d = *pd;
1062 
1063	*pd++ = pack_1x128_32 (
1064	    pix_multiply_1x128 (
1065		unpack_32_1x128 (s), negate_1x128 (
1066		    expand_alpha_1x128 (unpack_32_1x128 (d)))));
1067	w--;
1068	ps++;
1069	if (pm)
1070	    pm++;
1071    }
1072}
1073 
1074static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1075core_combine_atop_u_pixel_sse2 (uint32_t src,
1076                                uint32_t dst)
1077{
1078    __m128i s = unpack_32_1x128 (src);
1079    __m128i d = unpack_32_1x128 (dst);
1080 
1081    __m128i sa = negate_1x128 (expand_alpha_1x128 (s));
1082    __m128i da = expand_alpha_1x128 (d);
1083 
1084    return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
1085}
1086 
1087static void
1088sse2_combine_atop_u (pixman_implementation_t *imp,
1089                     pixman_op_t              op,
1090                     uint32_t *               pd,
1091                     const uint32_t *         ps,
1092                     const uint32_t *         pm,
1093                     int                      w)
1094{
1095    uint32_t s, d;
1096 
1097    __m128i xmm_src_lo, xmm_src_hi;
1098    __m128i xmm_dst_lo, xmm_dst_hi;
1099    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
1100    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
1101 
1102    while (w && ((uintptr_t)pd & 15))
1103    {
1104	s = combine1 (ps, pm);
1105	d = *pd;
1106 
1107	*pd++ = core_combine_atop_u_pixel_sse2 (s, d);
1108	w--;
1109	ps++;
1110	if (pm)
1111	    pm++;
1112    }
1113 
1114    while (w >= 4)
1115    {
1116	xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
1117	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
1118 
1119	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1120	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1121 
1122	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1123			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1124	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1125			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1126 
1127	negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
1128		      &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1129 
1130	pix_add_multiply_2x128 (
1131	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
1132	    &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
1133	    &xmm_dst_lo, &xmm_dst_hi);
1134 
1135	save_128_aligned (
1136	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1137 
1138	ps += 4;
1139	pd += 4;
1140	w -= 4;
1141	if (pm)
1142	    pm += 4;
1143    }
1144 
1145    while (w)
1146    {
1147	s = combine1 (ps, pm);
1148	d = *pd;
1149 
1150	*pd++ = core_combine_atop_u_pixel_sse2 (s, d);
1151	w--;
1152	ps++;
1153	if (pm)
1154	    pm++;
1155    }
1156}
1157 
1158static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1159core_combine_reverse_atop_u_pixel_sse2 (uint32_t src,
1160                                        uint32_t dst)
1161{
1162    __m128i s = unpack_32_1x128 (src);
1163    __m128i d = unpack_32_1x128 (dst);
1164 
1165    __m128i sa = expand_alpha_1x128 (s);
1166    __m128i da = negate_1x128 (expand_alpha_1x128 (d));
1167 
1168    return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
1169}
1170 
1171static void
1172sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
1173                             pixman_op_t              op,
1174                             uint32_t *               pd,
1175                             const uint32_t *         ps,
1176                             const uint32_t *         pm,
1177                             int                      w)
1178{
1179    uint32_t s, d;
1180 
1181    __m128i xmm_src_lo, xmm_src_hi;
1182    __m128i xmm_dst_lo, xmm_dst_hi;
1183    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
1184    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
1185 
1186    while (w && ((uintptr_t)pd & 15))
1187    {
1188	s = combine1 (ps, pm);
1189	d = *pd;
1190 
1191	*pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
1192	ps++;
1193	w--;
1194	if (pm)
1195	    pm++;
1196    }
1197 
1198    while (w >= 4)
1199    {
1200	xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
1201	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
1202 
1203	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1204	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1205 
1206	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1207			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1208	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1209			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1210 
1211	negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
1212		      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1213 
1214	pix_add_multiply_2x128 (
1215	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
1216	    &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
1217	    &xmm_dst_lo, &xmm_dst_hi);
1218 
1219	save_128_aligned (
1220	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1221 
1222	ps += 4;
1223	pd += 4;
1224	w -= 4;
1225	if (pm)
1226	    pm += 4;
1227    }
1228 
1229    while (w)
1230    {
1231	s = combine1 (ps, pm);
1232	d = *pd;
1233 
1234	*pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
1235	ps++;
1236	w--;
1237	if (pm)
1238	    pm++;
1239    }
1240}
1241 
1242static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1243core_combine_xor_u_pixel_sse2 (uint32_t src,
1244                               uint32_t dst)
1245{
1246    __m128i s = unpack_32_1x128 (src);
1247    __m128i d = unpack_32_1x128 (dst);
1248 
1249    __m128i neg_d = negate_1x128 (expand_alpha_1x128 (d));
1250    __m128i neg_s = negate_1x128 (expand_alpha_1x128 (s));
1251 
1252    return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s));
1253}
1254 
1255static void
1256sse2_combine_xor_u (pixman_implementation_t *imp,
1257                    pixman_op_t              op,
1258                    uint32_t *               dst,
1259                    const uint32_t *         src,
1260                    const uint32_t *         mask,
1261                    int                      width)
1262{
1263    int w = width;
1264    uint32_t s, d;
1265    uint32_t* pd = dst;
1266    const uint32_t* ps = src;
1267    const uint32_t* pm = mask;
1268 
1269    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
1270    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
1271    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
1272    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
1273 
1274    while (w && ((uintptr_t)pd & 15))
1275    {
1276	s = combine1 (ps, pm);
1277	d = *pd;
1278 
1279	*pd++ = core_combine_xor_u_pixel_sse2 (s, d);
1280	w--;
1281	ps++;
1282	if (pm)
1283	    pm++;
1284    }
1285 
1286    while (w >= 4)
1287    {
1288	xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm);
1289	xmm_dst = load_128_aligned ((__m128i*) pd);
1290 
1291	unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
1292	unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
1293 
1294	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1295			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1296	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1297			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1298 
1299	negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
1300		      &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1301	negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
1302		      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1303 
1304	pix_add_multiply_2x128 (
1305	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
1306	    &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
1307	    &xmm_dst_lo, &xmm_dst_hi);
1308 
1309	save_128_aligned (
1310	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1311 
1312	ps += 4;
1313	pd += 4;
1314	w -= 4;
1315	if (pm)
1316	    pm += 4;
1317    }
1318 
1319    while (w)
1320    {
1321	s = combine1 (ps, pm);
1322	d = *pd;
1323 
1324	*pd++ = core_combine_xor_u_pixel_sse2 (s, d);
1325	w--;
1326	ps++;
1327	if (pm)
1328	    pm++;
1329    }
1330}
1331 
1332static force_inline__inline__ __attribute__ ((__always_inline__)) void
1333sse2_combine_add_u (pixman_implementation_t *imp,
1334                    pixman_op_t              op,
1335                    uint32_t *               dst,
1336                    const uint32_t *         src,
1337                    const uint32_t *         mask,
1338                    int                      width)
1339{
1340    int w = width;
1341    uint32_t s, d;
1342    uint32_t* pd = dst;
1343    const uint32_t* ps = src;
1344    const uint32_t* pm = mask;
1345 
1346    while (w && (uintptr_t)pd & 15)
1347    {
1348	s = combine1 (ps, pm);
1349	d = *pd;
1350 
1351	ps++;
1352	if (pm)
1353	    pm++;
1354	*pd++ = _mm_cvtsi128_si32 (
1355	    _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
1356	w--;
1357    }
1358 
1359    while (w >= 4)
1360    {
1361	__m128i s;
1362 
1363	s = combine4 ((__m128i*)ps, (__m128i*)pm);
1364 
1365	save_128_aligned (
1366	    (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned  ((__m128i*)pd)));
1367 
1368	pd += 4;
1369	ps += 4;
1370	if (pm)
1371	    pm += 4;
1372	w -= 4;
1373    }
1374 
1375    while (w--)
1376    {
1377	s = combine1 (ps, pm);
1378	d = *pd;
1379 
1380	ps++;
1381	*pd++ = _mm_cvtsi128_si32 (
1382	    _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
1383	if (pm)
1384	    pm++;
1385    }
1386}
1387 
1388static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1389core_combine_saturate_u_pixel_sse2 (uint32_t src,
1390                                    uint32_t dst)
1391{
1392    __m128i ms = unpack_32_1x128 (src);
1393    __m128i md = unpack_32_1x128 (dst);
1394    uint32_t sa = src >> 24;
1395    uint32_t da = ~dst >> 24;
1396 
1397    if (sa > da)
1398    {
1399	ms = pix_multiply_1x128 (
1400	    ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa)(((uint16_t) (da) * 0xff + ((sa) / 2)) / (sa)) << 24)));
1401    }
1402 
1403    return pack_1x128_32 (_mm_adds_epu16 (md, ms));
1404}
1405 
1406static void
1407sse2_combine_saturate_u (pixman_implementation_t *imp,
1408                         pixman_op_t              op,
1409                         uint32_t *               pd,
1410                         const uint32_t *         ps,
1411                         const uint32_t *         pm,
1412                         int                      w)
1413{
1414    uint32_t s, d;
1415 
1416    uint32_t pack_cmp;
1417    __m128i xmm_src, xmm_dst;
1418 
1419    while (w && (uintptr_t)pd & 15)
1420    {
1421	s = combine1 (ps, pm);
1422	d = *pd;
1423 
1424	*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1425	w--;
1426	ps++;
1427	if (pm)
1428	    pm++;
1429    }
1430 
1431    while (w >= 4)
1432    {
1433	xmm_dst = load_128_aligned  ((__m128i*)pd);
1434	xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm);
1435 
1436	pack_cmp = _mm_movemask_epi8 (
1437	    _mm_cmpgt_epi32 (
1438		_mm_srli_epi32 (xmm_src, 24),
1439		_mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24)));
1440 
1441	/* if some alpha src is grater than respective ~alpha dst */
1442	if (pack_cmp)
1443	{
1444	    s = combine1 (ps++, pm);
1445	    d = *pd;
1446	    *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1447	    if (pm)
1448		pm++;
1449 
1450	    s = combine1 (ps++, pm);
1451	    d = *pd;
1452	    *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1453	    if (pm)
1454		pm++;
1455 
1456	    s = combine1 (ps++, pm);
1457	    d = *pd;
1458	    *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1459	    if (pm)
1460		pm++;
1461 
1462	    s = combine1 (ps++, pm);
1463	    d = *pd;
1464	    *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1465	    if (pm)
1466		pm++;
1467	}
1468	else
1469	{
1470	    save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src));
1471 
1472	    pd += 4;
1473	    ps += 4;
1474	    if (pm)
1475		pm += 4;
1476	}
1477 
1478	w -= 4;
1479    }
1480 
1481    while (w--)
1482    {
1483	s = combine1 (ps, pm);
1484	d = *pd;
1485 
1486	*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1487	ps++;
1488	if (pm)
1489	    pm++;
1490    }
1491}
1492 
1493static void
1494sse2_combine_src_ca (pixman_implementation_t *imp,
1495                     pixman_op_t              op,
1496                     uint32_t *               pd,
1497                     const uint32_t *         ps,
1498                     const uint32_t *         pm,
1499                     int                      w)
1500{
1501    uint32_t s, m;
1502 
1503    __m128i xmm_src_lo, xmm_src_hi;
1504    __m128i xmm_mask_lo, xmm_mask_hi;
1505    __m128i xmm_dst_lo, xmm_dst_hi;
1506 
1507    while (w && (uintptr_t)pd & 15)
1508    {
1509	s = *ps++;
1510	m = *pm++;
1511	*pd++ = pack_1x128_32 (
1512	    pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
1513	w--;
1514    }
1515 
1516    while (w >= 4)
1517    {
1518	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1519	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1520 
1521	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1522	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1523 
1524	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1525			    &xmm_mask_lo, &xmm_mask_hi,
1526			    &xmm_dst_lo, &xmm_dst_hi);
1527 
1528	save_128_aligned (
1529	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1530 
1531	ps += 4;
1532	pd += 4;
1533	pm += 4;
1534	w -= 4;
1535    }
1536 
1537    while (w)
1538    {
1539	s = *ps++;
1540	m = *pm++;
1541	*pd++ = pack_1x128_32 (
1542	    pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
1543	w--;
1544    }
1545}
1546 
1547static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1548core_combine_over_ca_pixel_sse2 (uint32_t src,
1549                                 uint32_t mask,
1550                                 uint32_t dst)
1551{
1552    __m128i s = unpack_32_1x128 (src);
1553    __m128i expAlpha = expand_alpha_1x128 (s);
1554    __m128i unpk_mask = unpack_32_1x128 (mask);
1555    __m128i unpk_dst  = unpack_32_1x128 (dst);
1556 
1557    return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst));
1558}
1559 
1560static void
1561sse2_combine_over_ca (pixman_implementation_t *imp,
1562                      pixman_op_t              op,
1563                      uint32_t *               pd,
1564                      const uint32_t *         ps,
1565                      const uint32_t *         pm,
1566                      int                      w)
1567{
1568    uint32_t s, m, d;
1569 
1570    __m128i xmm_alpha_lo, xmm_alpha_hi;
1571    __m128i xmm_src_lo, xmm_src_hi;
1572    __m128i xmm_dst_lo, xmm_dst_hi;
1573    __m128i xmm_mask_lo, xmm_mask_hi;
1574 
1575    while (w && (uintptr_t)pd & 15)
1576    {
1577	s = *ps++;
1578	m = *pm++;
1579	d = *pd;
1580 
1581	*pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
1582	w--;
1583    }
1584 
1585    while (w >= 4)
1586    {
1587	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1588	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1589	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1590 
1591	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1592	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1593	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1594 
1595	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1596			    &xmm_alpha_lo, &xmm_alpha_hi);
1597 
1598	in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
1599		       &xmm_alpha_lo, &xmm_alpha_hi,
1600		       &xmm_mask_lo, &xmm_mask_hi,
1601		       &xmm_dst_lo, &xmm_dst_hi);
1602 
1603	save_128_aligned (
1604	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1605 
1606	ps += 4;
1607	pd += 4;
1608	pm += 4;
1609	w -= 4;
1610    }
1611 
1612    while (w)
1613    {
1614	s = *ps++;
1615	m = *pm++;
1616	d = *pd;
1617 
1618	*pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
1619	w--;
1620    }
1621}
1622 
1623static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1624core_combine_over_reverse_ca_pixel_sse2 (uint32_t src,
1625                                         uint32_t mask,
1626                                         uint32_t dst)
1627{
1628    __m128i d = unpack_32_1x128 (dst);
1629 
1630    return pack_1x128_32 (
1631	over_1x128 (d, expand_alpha_1x128 (d),
1632		    pix_multiply_1x128 (unpack_32_1x128 (src),
1633					unpack_32_1x128 (mask))));
1634}
1635 
1636static void
1637sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
1638                              pixman_op_t              op,
1639                              uint32_t *               pd,
1640                              const uint32_t *         ps,
1641                              const uint32_t *         pm,
1642                              int                      w)
1643{
1644    uint32_t s, m, d;
1645 
1646    __m128i xmm_alpha_lo, xmm_alpha_hi;
1647    __m128i xmm_src_lo, xmm_src_hi;
1648    __m128i xmm_dst_lo, xmm_dst_hi;
1649    __m128i xmm_mask_lo, xmm_mask_hi;
1650 
1651    while (w && (uintptr_t)pd & 15)
1652    {
1653	s = *ps++;
1654	m = *pm++;
1655	d = *pd;
1656 
1657	*pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
1658	w--;
1659    }
1660 
1661    while (w >= 4)
1662    {
1663	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1664	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1665	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1666 
1667	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1668	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1669	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1670 
1671	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1672			    &xmm_alpha_lo, &xmm_alpha_hi);
1673	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1674			    &xmm_mask_lo, &xmm_mask_hi,
1675			    &xmm_mask_lo, &xmm_mask_hi);
1676 
1677	over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1678		    &xmm_alpha_lo, &xmm_alpha_hi,
1679		    &xmm_mask_lo, &xmm_mask_hi);
1680 
1681	save_128_aligned (
1682	    (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
1683 
1684	ps += 4;
1685	pd += 4;
1686	pm += 4;
1687	w -= 4;
1688    }
1689 
1690    while (w)
1691    {
1692	s = *ps++;
1693	m = *pm++;
1694	d = *pd;
1695 
1696	*pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
1697	w--;
1698    }
1699}
1700 
1701static void
1702sse2_combine_in_ca (pixman_implementation_t *imp,
1703                    pixman_op_t              op,
1704                    uint32_t *               pd,
1705                    const uint32_t *         ps,
1706                    const uint32_t *         pm,
1707                    int                      w)
1708{
1709    uint32_t s, m, d;
1710 
1711    __m128i xmm_alpha_lo, xmm_alpha_hi;
1712    __m128i xmm_src_lo, xmm_src_hi;
1713    __m128i xmm_dst_lo, xmm_dst_hi;
1714    __m128i xmm_mask_lo, xmm_mask_hi;
1715 
1716    while (w && (uintptr_t)pd & 15)
1717    {
1718	s = *ps++;
1719	m = *pm++;
1720	d = *pd;
1721 
1722	*pd++ = pack_1x128_32 (
1723	    pix_multiply_1x128 (
1724		pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)),
1725		expand_alpha_1x128 (unpack_32_1x128 (d))));
1726 
1727	w--;
1728    }
1729 
1730    while (w >= 4)
1731    {
1732	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1733	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1734	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1735 
1736	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1737	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1738	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1739 
1740	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1741			    &xmm_alpha_lo, &xmm_alpha_hi);
1742 
1743	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1744			    &xmm_mask_lo, &xmm_mask_hi,
1745			    &xmm_dst_lo, &xmm_dst_hi);
1746 
1747	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1748			    &xmm_alpha_lo, &xmm_alpha_hi,
1749			    &xmm_dst_lo, &xmm_dst_hi);
1750 
1751	save_128_aligned (
1752	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1753 
1754	ps += 4;
1755	pd += 4;
1756	pm += 4;
1757	w -= 4;
1758    }
1759 
1760    while (w)
1761    {
1762	s = *ps++;
1763	m = *pm++;
1764	d = *pd;
1765 
1766	*pd++ = pack_1x128_32 (
1767	    pix_multiply_1x128 (
1768		pix_multiply_1x128 (
1769		    unpack_32_1x128 (s), unpack_32_1x128 (m)),
1770		expand_alpha_1x128 (unpack_32_1x128 (d))));
1771 
1772	w--;
1773    }
1774}
1775 
1776static void
1777sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
1778                            pixman_op_t              op,
1779                            uint32_t *               pd,
1780                            const uint32_t *         ps,
1781                            const uint32_t *         pm,
1782                            int                      w)
1783{
1784    uint32_t s, m, d;
1785 
1786    __m128i xmm_alpha_lo, xmm_alpha_hi;
1787    __m128i xmm_src_lo, xmm_src_hi;
1788    __m128i xmm_dst_lo, xmm_dst_hi;
1789    __m128i xmm_mask_lo, xmm_mask_hi;
1790 
1791    while (w && (uintptr_t)pd & 15)
1792    {
1793	s = *ps++;
1794	m = *pm++;
1795	d = *pd;
1796 
1797	*pd++ = pack_1x128_32 (
1798	    pix_multiply_1x128 (
1799		unpack_32_1x128 (d),
1800		pix_multiply_1x128 (unpack_32_1x128 (m),
1801				   expand_alpha_1x128 (unpack_32_1x128 (s)))));
1802	w--;
1803    }
1804 
1805    while (w >= 4)
1806    {
1807	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1808	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1809	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1810 
1811	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1812	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1813	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1814 
1815	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1816			    &xmm_alpha_lo, &xmm_alpha_hi);
1817	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
1818			    &xmm_alpha_lo, &xmm_alpha_hi,
1819			    &xmm_alpha_lo, &xmm_alpha_hi);
1820 
1821	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1822			    &xmm_alpha_lo, &xmm_alpha_hi,
1823			    &xmm_dst_lo, &xmm_dst_hi);
1824 
1825	save_128_aligned (
1826	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1827 
1828	ps += 4;
1829	pd += 4;
1830	pm += 4;
1831	w -= 4;
1832    }
1833 
1834    while (w)
1835    {
1836	s = *ps++;
1837	m = *pm++;
1838	d = *pd;
1839 
1840	*pd++ = pack_1x128_32 (
1841	    pix_multiply_1x128 (
1842		unpack_32_1x128 (d),
1843		pix_multiply_1x128 (unpack_32_1x128 (m),
1844				   expand_alpha_1x128 (unpack_32_1x128 (s)))));
1845	w--;
1846    }
1847}
1848 
1849static void
1850sse2_combine_out_ca (pixman_implementation_t *imp,
1851                     pixman_op_t              op,
1852                     uint32_t *               pd,
1853                     const uint32_t *         ps,
1854                     const uint32_t *         pm,
1855                     int                      w)
1856{
1857    uint32_t s, m, d;
1858 
1859    __m128i xmm_alpha_lo, xmm_alpha_hi;
1860    __m128i xmm_src_lo, xmm_src_hi;
1861    __m128i xmm_dst_lo, xmm_dst_hi;
1862    __m128i xmm_mask_lo, xmm_mask_hi;
1863 
1864    while (w && (uintptr_t)pd & 15)
1865    {
1866	s = *ps++;
1867	m = *pm++;
1868	d = *pd;
1869 
1870	*pd++ = pack_1x128_32 (
1871	    pix_multiply_1x128 (
1872		pix_multiply_1x128 (
1873		    unpack_32_1x128 (s), unpack_32_1x128 (m)),
1874		negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
1875	w--;
1876    }
1877 
1878    while (w >= 4)
1879    {
1880	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1881	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1882	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1883 
1884	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1885	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1886	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1887 
1888	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1889			    &xmm_alpha_lo, &xmm_alpha_hi);
1890	negate_2x128 (xmm_alpha_lo, xmm_alpha_hi,
1891		      &xmm_alpha_lo, &xmm_alpha_hi);
1892 
1893	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1894			    &xmm_mask_lo, &xmm_mask_hi,
1895			    &xmm_dst_lo, &xmm_dst_hi);
1896	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1897			    &xmm_alpha_lo, &xmm_alpha_hi,
1898			    &xmm_dst_lo, &xmm_dst_hi);
1899 
1900	save_128_aligned (
1901	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1902 
1903	ps += 4;
1904	pd += 4;
1905	pm += 4;
1906	w -= 4;
1907    }
1908 
1909    while (w)
1910    {
1911	s = *ps++;
1912	m = *pm++;
1913	d = *pd;
1914 
1915	*pd++ = pack_1x128_32 (
1916	    pix_multiply_1x128 (
1917		pix_multiply_1x128 (
1918		    unpack_32_1x128 (s), unpack_32_1x128 (m)),
1919		negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
1920 
1921	w--;
1922    }
1923}
1924 
1925static void
1926sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
1927                             pixman_op_t              op,
1928                             uint32_t *               pd,
1929                             const uint32_t *         ps,
1930                             const uint32_t *         pm,
1931                             int                      w)
1932{
1933    uint32_t s, m, d;
1934 
1935    __m128i xmm_alpha_lo, xmm_alpha_hi;
1936    __m128i xmm_src_lo, xmm_src_hi;
1937    __m128i xmm_dst_lo, xmm_dst_hi;
1938    __m128i xmm_mask_lo, xmm_mask_hi;
1939 
1940    while (w && (uintptr_t)pd & 15)
1941    {
1942	s = *ps++;
1943	m = *pm++;
1944	d = *pd;
1945 
1946	*pd++ = pack_1x128_32 (
1947	    pix_multiply_1x128 (
1948		unpack_32_1x128 (d),
1949		negate_1x128 (pix_multiply_1x128 (
1950				 unpack_32_1x128 (m),
1951				 expand_alpha_1x128 (unpack_32_1x128 (s))))));
1952	w--;
1953    }
1954 
1955    while (w >= 4)
1956    {
1957	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1958	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1959	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1960 
1961	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1962	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1963	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1964 
1965	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1966			    &xmm_alpha_lo, &xmm_alpha_hi);
1967 
1968	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
1969			    &xmm_alpha_lo, &xmm_alpha_hi,
1970			    &xmm_mask_lo, &xmm_mask_hi);
1971 
1972	negate_2x128 (xmm_mask_lo, xmm_mask_hi,
1973		      &xmm_mask_lo, &xmm_mask_hi);
1974 
1975	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1976			    &xmm_mask_lo, &xmm_mask_hi,
1977			    &xmm_dst_lo, &xmm_dst_hi);
1978 
1979	save_128_aligned (
1980	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1981 
1982	ps += 4;
1983	pd += 4;
1984	pm += 4;
1985	w -= 4;
1986    }
1987 
1988    while (w)
1989    {
1990	s = *ps++;
1991	m = *pm++;
1992	d = *pd;
1993 
1994	*pd++ = pack_1x128_32 (
1995	    pix_multiply_1x128 (
1996		unpack_32_1x128 (d),
1997		negate_1x128 (pix_multiply_1x128 (
1998				 unpack_32_1x128 (m),
1999				 expand_alpha_1x128 (unpack_32_1x128 (s))))));
2000	w--;
2001    }
2002}
2003 
2004static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
2005core_combine_atop_ca_pixel_sse2 (uint32_t src,
2006                                 uint32_t mask,
2007                                 uint32_t dst)
2008{
2009    __m128i m = unpack_32_1x128 (mask);
2010    __m128i s = unpack_32_1x128 (src);
2011    __m128i d = unpack_32_1x128 (dst);
2012    __m128i sa = expand_alpha_1x128 (s);
2013    __m128i da = expand_alpha_1x128 (d);
2014 
2015    s = pix_multiply_1x128 (s, m);
2016    m = negate_1x128 (pix_multiply_1x128 (m, sa));
2017 
2018    return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
2019}
2020 
2021static void
2022sse2_combine_atop_ca (pixman_implementation_t *imp,
2023                      pixman_op_t              op,
2024                      uint32_t *               pd,
2025                      const uint32_t *         ps,
2026                      const uint32_t *         pm,
2027                      int                      w)
2028{
2029    uint32_t s, m, d;
2030 
2031    __m128i xmm_src_lo, xmm_src_hi;
2032    __m128i xmm_dst_lo, xmm_dst_hi;
2033    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
2034    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
2035    __m128i xmm_mask_lo, xmm_mask_hi;
2036 
2037    while (w && (uintptr_t)pd & 15)
2038    {
2039	s = *ps++;
2040	m = *pm++;
2041	d = *pd;
2042 
2043	*pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
2044	w--;
2045    }
2046 
2047    while (w >= 4)
2048    {
2049	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
2050	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
2051	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
2052 
2053	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
2054	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
2055	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
2056 
2057	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
2058			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
2059	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
2060			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2061 
2062	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
2063			    &xmm_mask_lo, &xmm_mask_hi,
2064			    &xmm_src_lo, &xmm_src_hi);
2065	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
2066			    &xmm_alpha_src_lo, &xmm_alpha_src_hi,
2067			    &xmm_mask_lo, &xmm_mask_hi);
2068 
2069	negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
2070 
2071	pix_add_multiply_2x128 (
2072	    &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
2073	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
2074	    &xmm_dst_lo, &xmm_dst_hi);
2075 
2076	save_128_aligned (
2077	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2078 
2079	ps += 4;
2080	pd += 4;
2081	pm += 4;
2082	w -= 4;
2083    }
2084 
2085    while (w)
2086    {
2087	s = *ps++;
2088	m = *pm++;
2089	d = *pd;
2090 
2091	*pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
2092	w--;
2093    }
2094}
2095 
2096static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
2097core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src,
2098                                         uint32_t mask,
2099                                         uint32_t dst)
2100{
2101    __m128i m = unpack_32_1x128 (mask);
2102    __m128i s = unpack_32_1x128 (src);
2103    __m128i d = unpack_32_1x128 (dst);
2104 
2105    __m128i da = negate_1x128 (expand_alpha_1x128 (d));
2106    __m128i sa = expand_alpha_1x128 (s);
2107 
2108    s = pix_multiply_1x128 (s, m);
2109    m = pix_multiply_1x128 (m, sa);
2110 
2111    return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
2112}
2113 
2114static void
2115sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
2116                              pixman_op_t              op,
2117                              uint32_t *               pd,
2118                              const uint32_t *         ps,
2119                              const uint32_t *         pm,
2120                              int                      w)
2121{
2122    uint32_t s, m, d;
2123 
2124    __m128i xmm_src_lo, xmm_src_hi;
2125    __m128i xmm_dst_lo, xmm_dst_hi;
2126    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
2127    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
2128    __m128i xmm_mask_lo, xmm_mask_hi;
2129 
2130    while (w && (uintptr_t)pd & 15)
2131    {
2132	s = *ps++;
2133	m = *pm++;
2134	d = *pd;
2135 
2136	*pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
2137	w--;
2138    }
2139 
2140    while (w >= 4)
2141    {
2142	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
2143	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
2144	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
2145 
2146	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
2147	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
2148	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
2149 
2150	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
2151			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
2152	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
2153			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2154 
2155	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
2156			    &xmm_mask_lo, &xmm_mask_hi,
2157			    &xmm_src_lo, &xmm_src_hi);
2158	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
2159			    &xmm_alpha_src_lo, &xmm_alpha_src_hi,
2160			    &xmm_mask_lo, &xmm_mask_hi);
2161 
2162	negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
2163		      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2164 
2165	pix_add_multiply_2x128 (
2166	    &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
2167	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
2168	    &xmm_dst_lo, &xmm_dst_hi);
2169 
2170	save_128_aligned (
2171	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2172 
2173	ps += 4;
2174	pd += 4;
2175	pm += 4;
2176	w -= 4;
2177    }
2178 
2179    while (w)
2180    {
2181	s = *ps++;
2182	m = *pm++;
2183	d = *pd;
2184 
2185	*pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
2186	w--;
2187    }
2188}
2189 
2190static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
2191core_combine_xor_ca_pixel_sse2 (uint32_t src,
2192                                uint32_t mask,
2193                                uint32_t dst)
2194{
2195    __m128i a = unpack_32_1x128 (mask);
2196    __m128i s = unpack_32_1x128 (src);
2197    __m128i d = unpack_32_1x128 (dst);
2198 
2199    __m128i alpha_dst = negate_1x128 (pix_multiply_1x128 (
2200				       a, expand_alpha_1x128 (s)));
2201    __m128i dest      = pix_multiply_1x128 (s, a);
2202    __m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d));
2203 
2204    return pack_1x128_32 (pix_add_multiply_1x128 (&d,
2205                                                &alpha_dst,
2206                                                &dest,
2207                                                &alpha_src));
2208}
2209 
2210static void
2211sse2_combine_xor_ca (pixman_implementation_t *imp,
2212                     pixman_op_t              op,
2213                     uint32_t *               pd,
2214                     const uint32_t *         ps,
2215                     const uint32_t *         pm,
2216                     int                      w)
2217{
2218    uint32_t s, m, d;
2219 
2220    __m128i xmm_src_lo, xmm_src_hi;
2221    __m128i xmm_dst_lo, xmm_dst_hi;
2222    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
2223    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
2224    __m128i xmm_mask_lo, xmm_mask_hi;
2225 
2226    while (w && (uintptr_t)pd & 15)
2227    {
2228	s = *ps++;
2229	m = *pm++;
2230	d = *pd;
2231 
2232	*pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
2233	w--;
2234    }
2235 
2236    while (w >= 4)
2237    {
2238	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
2239	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
2240	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
2241 
2242	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
2243	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
2244	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
2245 
2246	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
2247			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
2248	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
2249			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2250 
2251	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
2252			    &xmm_mask_lo, &xmm_mask_hi,
2253			    &xmm_src_lo, &xmm_src_hi);
2254	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
2255			    &xmm_alpha_src_lo, &xmm_alpha_src_hi,
2256			    &xmm_mask_lo, &xmm_mask_hi);
2257 
2258	negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
2259		      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2260	negate_2x128 (xmm_mask_lo, xmm_mask_hi,
2261		      &xmm_mask_lo, &xmm_mask_hi);
2262 
2263	pix_add_multiply_2x128 (
2264	    &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
2265	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
2266	    &xmm_dst_lo, &xmm_dst_hi);
2267 
2268	save_128_aligned (
2269	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2270 
2271	ps += 4;
2272	pd += 4;
2273	pm += 4;
2274	w -= 4;
2275    }
2276 
2277    while (w)
2278    {
2279	s = *ps++;
2280	m = *pm++;
2281	d = *pd;
2282 
2283	*pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
2284	w--;
2285    }
2286}
2287 
2288static void
2289sse2_combine_add_ca (pixman_implementation_t *imp,
2290                     pixman_op_t              op,
2291                     uint32_t *               pd,
2292                     const uint32_t *         ps,
2293                     const uint32_t *         pm,
2294                     int                      w)
2295{
2296    uint32_t s, m, d;
2297 
2298    __m128i xmm_src_lo, xmm_src_hi;
2299    __m128i xmm_dst_lo, xmm_dst_hi;
2300    __m128i xmm_mask_lo, xmm_mask_hi;
2301 
2302    while (w && (uintptr_t)pd & 15)
2303    {
2304	s = *ps++;
2305	m = *pm++;
2306	d = *pd;
2307 
2308	*pd++ = pack_1x128_32 (
2309	    _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
2310					       unpack_32_1x128 (m)),
2311			   unpack_32_1x128 (d)));
2312	w--;
2313    }
2314 
2315    while (w >= 4)
2316    {
2317	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
2318	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
2319	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
2320 
2321	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
2322	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
2323	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
2324 
2325	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
2326			    &xmm_mask_lo, &xmm_mask_hi,
2327			    &xmm_src_lo, &xmm_src_hi);
2328 
2329	save_128_aligned (
2330	    (__m128i*)pd, pack_2x128_128 (
2331		_mm_adds_epu8 (xmm_src_lo, xmm_dst_lo),
2332		_mm_adds_epu8 (xmm_src_hi, xmm_dst_hi)));
2333 
2334	ps += 4;
2335	pd += 4;
2336	pm += 4;
2337	w -= 4;
2338    }
2339 
2340    while (w)
2341    {
2342	s = *ps++;
2343	m = *pm++;
2344	d = *pd;
2345 
2346	*pd++ = pack_1x128_32 (
2347	    _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
2348					       unpack_32_1x128 (m)),
2349			   unpack_32_1x128 (d)));
2350	w--;
2351    }
2352}
2353 
2354static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
2355create_mask_16_128 (uint16_t mask)
2356{
2357    return _mm_set1_epi16 (mask);
2358}
2359 
2360/* Work around a code generation bug in Sun Studio 12. */
2361#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
2362# define create_mask_2x32_128(mask0, mask1)				\
2363    (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1)))
2364#else
2365static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
2366create_mask_2x32_128 (uint32_t mask0,
2367                      uint32_t mask1)
2368{
2369    return _mm_set_epi32 (mask0, mask1, mask0, mask1);
2370}
2371#endif
2372 
2373static void
2374sse2_composite_over_n_8888 (pixman_implementation_t *imp,
2375                            pixman_composite_info_t *info)
2376{
2377    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
2378    uint32_t src;
2379    uint32_t    *dst_line, *dst, d;
2380    int32_t w;
2381    int dst_stride;
2382    __m128i xmm_src, xmm_alpha;
2383    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
2384 
2385    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2386 
2387    if (src == 0)
2388	return;
2389 
2390    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2391	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
2392 
2393    xmm_src = expand_pixel_32_1x128 (src);
2394    xmm_alpha = expand_alpha_1x128 (xmm_src);
2395 
2396    while (height--)
2397    {
2398	dst = dst_line;
2399 
2400	dst_line += dst_stride;
2401	w = width;
2402 
2403	while (w && (uintptr_t)dst & 15)
2404	{
2405	    d = *dst;
2406	    *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
2407						xmm_alpha,
2408						unpack_32_1x128 (d)));
2409	    w--;
2410	}
2411 
2412	while (w >= 4)
2413	{
2414	    xmm_dst = load_128_aligned ((__m128i*)dst);
2415 
2416	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
2417 
2418	    over_2x128 (&xmm_src, &xmm_src,
2419			&xmm_alpha, &xmm_alpha,
2420			&xmm_dst_lo, &xmm_dst_hi);
2421 
2422	    /* rebuid the 4 pixel data and save*/
2423	    save_128_aligned (
2424		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2425 
2426	    w -= 4;
2427	    dst += 4;
2428	}
2429 
2430	while (w)
2431	{
2432	    d = *dst;
2433	    *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
2434						xmm_alpha,
2435						unpack_32_1x128 (d)));
2436	    w--;
2437	}
2438 
2439    }
2440}
2441 
2442static void
2443sse2_composite_over_n_0565 (pixman_implementation_t *imp,
2444                            pixman_composite_info_t *info)
2445{
2446    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
2447    uint32_t src;
2448    uint16_t    *dst_line, *dst, d;
2449    int32_t w;
2450    int dst_stride;
2451    __m128i xmm_src, xmm_alpha;
2452    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
2453 
2454    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2455 
2456    if (src == 0)
2457	return;
2458 
2459    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2460	dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
2461 
2462    xmm_src = expand_pixel_32_1x128 (src);
2463    xmm_alpha = expand_alpha_1x128 (xmm_src);
2464 
2465    while (height--)
2466    {
2467	dst = dst_line;
2468 
2469	dst_line += dst_stride;
2470	w = width;
2471 
2472	while (w && (uintptr_t)dst & 15)
2473	{
2474	    d = *dst;
2475 
2476	    *dst++ = pack_565_32_16 (
2477		pack_1x128_32 (over_1x128 (xmm_src,
2478					   xmm_alpha,
2479					   expand565_16_1x128 (d))));
2480	    w--;
2481	}
2482 
2483	while (w >= 8)
2484	{
2485	    xmm_dst = load_128_aligned ((__m128i*)dst);
2486 
2487	    unpack_565_128_4x128 (xmm_dst,
2488				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
2489 
2490	    over_2x128 (&xmm_src, &xmm_src,
2491			&xmm_alpha, &xmm_alpha,
2492			&xmm_dst0, &xmm_dst1);
2493	    over_2x128 (&xmm_src, &xmm_src,
2494			&xmm_alpha, &xmm_alpha,
2495			&xmm_dst2, &xmm_dst3);
2496 
2497	    xmm_dst = pack_565_4x128_128 (
2498		&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
2499 
2500	    save_128_aligned ((__m128i*)dst, xmm_dst);
2501 
2502	    dst += 8;
2503	    w -= 8;
2504	}
2505 
2506	while (w--)
2507	{
2508	    d = *dst;
2509	    *dst++ = pack_565_32_16 (
2510		pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha,
2511					   expand565_16_1x128 (d))));
2512	}
2513    }
2514 
2515}
2516 
2517static void
2518sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
2519				   pixman_composite_info_t *info)
2520{
2521    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
2522    uint32_t src;
2523    uint32_t    *dst_line, d;
2524    uint32_t    *mask_line, m;
2525    uint32_t pack_cmp;
2526    int dst_stride, mask_stride;
2527 
2528    __m128i xmm_src;
2529    __m128i xmm_dst;
2530    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
2531 
2532    __m128i mmx_src, mmx_mask, mmx_dest;
2533 
2534    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2535 
2536    if (src == 0)
2537	return;
2538 
2539    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2540	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
2541    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
2542	mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);
2543 
2544    xmm_src = _mm_unpacklo_epi8 (
2545	create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
2546    mmx_src   = xmm_src;
2547 
2548    while (height--)
2549    {
2550	int w = width;
2551	const uint32_t *pm = (uint32_t *)mask_line;
2552	uint32_t *pd = (uint32_t *)dst_line;
2553 
2554	dst_line += dst_stride;
2555	mask_line += mask_stride;
2556 
2557	while (w && (uintptr_t)pd & 15)
2558	{
2559	    m = *pm++;
2560 
2561	    if (m)
2562	    {
2563		d = *pd;
2564 
2565		mmx_mask = unpack_32_1x128 (m);
2566		mmx_dest = unpack_32_1x128 (d);
2567 
2568		*pd = pack_1x128_32 (
2569		    _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
2570				   mmx_dest));
2571	    }
2572 
2573	    pd++;
2574	    w--;
2575	}
2576 
2577	while (w >= 4)
2578	{
2579	    xmm_mask = load_128_unaligned ((__m128i*)pm);
2580 
2581	    pack_cmp =
2582		_mm_movemask_epi8 (
2583		    _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
2584 
2585	    /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
2586	    if (pack_cmp != 0xffff)
2587	    {
2588		xmm_dst = load_128_aligned ((__m128i*)pd);
2589 
2590		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
2591 
2592		pix_multiply_2x128 (&xmm_src, &xmm_src,
2593				    &xmm_mask_lo, &xmm_mask_hi,
2594				    &xmm_mask_lo, &xmm_mask_hi);
2595		xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi);
2596 
2597		save_128_aligned (
2598		    (__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst));
2599	    }
2600 
2601	    pd += 4;
2602	    pm += 4;
2603	    w -= 4;
2604	}
2605 
2606	while (w)
2607	{
2608	    m = *pm++;
2609 
2610	    if (m)
2611	    {
2612		d = *pd;
2613 
2614		mmx_mask = unpack_32_1x128 (m);
2615		mmx_dest = unpack_32_1x128 (d);
2616 
2617		*pd = pack_1x128_32 (
2618		    _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
2619				   mmx_dest));
2620	    }
2621 
2622	    pd++;
2623	    w--;
2624	}
2625    }
2626 
2627}
2628 
2629static void
2630sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
2631                                    pixman_composite_info_t *info)
2632{
2633    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
2634    uint32_t src;
2635    uint32_t    *dst_line, d;
2636    uint32_t    *mask_line, m;
2637    uint32_t pack_cmp;
2638    int dst_stride, mask_stride;
2639 
2640    __m128i xmm_src, xmm_alpha;
2641    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
2642    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
2643 
2644    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
2645 
2646    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2647 
2648    if (src == 0)
2649	return;
2650 
2651    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2652	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
2653    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
2654	mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);
2655 
2656    xmm_src = _mm_unpacklo_epi8 (
2657	create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
2658    xmm_alpha = expand_alpha_1x128 (xmm_src);
2659    mmx_src   = xmm_src;
2660    mmx_alpha = xmm_alpha;
2661 
2662    while (height--)
2663    {
2664	int w = width;
2665	const uint32_t *pm = (uint32_t *)mask_line;
2666	uint32_t *pd = (uint32_t *)dst_line;
2667 
2668	dst_line += dst_stride;
2669	mask_line += mask_stride;
2670 
2671	while (w && (uintptr_t)pd & 15)
2672	{
2673	    m = *pm++;
2674 
2675	    if (m)
2676	    {
2677		d = *pd;
2678		mmx_mask = unpack_32_1x128 (m);
2679		mmx_dest = unpack_32_1x128 (d);
2680 
2681		*pd = pack_1x128_32 (in_over_1x128 (&mmx_src,
2682		                                  &mmx_alpha,
2683		                                  &mmx_mask,
2684		                                  &mmx_dest));
2685	    }
2686 
2687	    pd++;
2688	    w--;
2689	}
2690 
2691	while (w >= 4)
2692	{
2693	    xmm_mask = load_128_unaligned ((__m128i*)pm);
2694 
2695	    pack_cmp =
2696		_mm_movemask_epi8 (
2697		    _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
2698 
2699	    /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
2700	    if (pack_cmp != 0xffff)
2701	    {
2702		xmm_dst = load_128_aligned ((__m128i*)pd);
2703 
2704		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
2705		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
2706 
2707		in_over_2x128 (&xmm_src, &xmm_src,
2708			       &xmm_alpha, &xmm_alpha,
2709			       &xmm_mask_lo, &xmm_mask_hi,
2710			       &xmm_dst_lo, &xmm_dst_hi);
2711 
2712		save_128_aligned (
2713		    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2714	    }
2715 
2716	    pd += 4;
2717	    pm += 4;
2718	    w -= 4;
2719	}
2720 
2721	while (w)
2722	{
2723	    m = *pm++;
2724 
2725	    if (m)
2726	    {
2727		d = *pd;
2728		mmx_mask = unpack_32_1x128 (m);
2729		mmx_dest = unpack_32_1x128 (d);
2730 
2731		*pd = pack_1x128_32 (
2732		    in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));
2733	    }
2734 
2735	    pd++;
2736	    w--;
2737	}
2738    }
2739 
2740}
2741 
2742static void
2743sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
2744                                 pixman_composite_info_t *info)
2745{
2746    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
2747    uint32_t    *dst_line, *dst;
2748    uint32_t    *src_line, *src;
2749    uint32_t mask;
2750    int32_t w;
2751    int dst_stride, src_stride;
2752 
2753    __m128i xmm_mask;
2754    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
2755    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
2756    __m128i xmm_alpha_lo, xmm_alpha_hi;
2757 
2758    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2759	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
2760    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
2761	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
2762 
2763    mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
2764 
2765    xmm_mask = create_mask_16_128 (mask >> 24);
2766 
2767    while (height--)
2768    {
2769	dst = dst_line;
2770	dst_line += dst_stride;
2771	src = src_line;
2772	src_line += src_stride;
2773	w = width;
2774 
2775	while (w && (uintptr_t)dst & 15)
2776	{
2777	    uint32_t s = *src++;
2778 
2779	    if (s)
2780	    {
2781		uint32_t d = *dst;
2782		
2783		__m128i ms = unpack_32_1x128 (s);
2784		__m128i alpha    = expand_alpha_1x128 (ms);
2785		__m128i dest     = xmm_mask;
2786		__m128i alpha_dst = unpack_32_1x128 (d);
2787		
2788		*dst = pack_1x128_32 (
2789		    in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
2790	    }
2791	    dst++;
2792	    w--;
2793	}
2794 
2795	while (w >= 4)
2796	{
2797	    xmm_src = load_128_unaligned ((__m128i*)src);
2798 
2799	    if (!is_zero (xmm_src))
2800	    {
2801		xmm_dst = load_128_aligned ((__m128i*)dst);
2802		
2803		unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
2804		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
2805		expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
2806				    &xmm_alpha_lo, &xmm_alpha_hi);
2807		
2808		in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
2809			       &xmm_alpha_lo, &xmm_alpha_hi,
2810			       &xmm_mask, &xmm_mask,
2811			       &xmm_dst_lo, &xmm_dst_hi);
2812		
2813		save_128_aligned (
2814		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2815	    }
2816		
2817	    dst += 4;
2818	    src += 4;
2819	    w -= 4;
2820	}
2821 
2822	while (w)
2823	{
2824	    uint32_t s = *src++;
2825 
2826	    if (s)
2827	    {
2828		uint32_t d = *dst;
2829		
2830		__m128i ms = unpack_32_1x128 (s);
2831		__m128i alpha = expand_alpha_1x128 (ms);
2832		__m128i mask  = xmm_mask;
2833		__m128i dest  = unpack_32_1x128 (d);
2834		
2835		*dst = pack_1x128_32 (
2836		    in_over_1x128 (&ms, &alpha, &mask, &dest));
2837	    }
2838 
2839	    dst++;
2840	    w--;
2841	}
2842    }
2843 
2844}
2845 
2846static void
2847sse2_composite_src_x888_0565 (pixman_implementation_t *imp,
2848                              pixman_composite_info_t *info)
2849{
2850    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
2851    uint16_t    *dst_line, *dst;
2852    uint32_t    *src_line, *src, s;
2853    int dst_stride, src_stride;
2854    int32_t w;
2855 
2856    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
2857    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
2858 
2859    while (height--)
2860    {
2861	dst = dst_line;
2862	dst_line += dst_stride;
2863	src = src_line;
2864	src_line += src_stride;
2865	w = width;
2866 
2867	while (w && (uintptr_t)dst & 15)
2868	{
2869	    s = *src++;
2870	    *dst = convert_8888_to_0565 (s);
2871	    dst++;
2872	    w--;
2873	}
2874 
2875	while (w >= 8)
2876	{
2877	    __m128i xmm_src0 = load_128_unaligned ((__m128i *)src + 0);
2878	    __m128i xmm_src1 = load_128_unaligned ((__m128i *)src + 1);
2879 
2880	    save_128_aligned ((__m128i*)dst, pack_565_2packedx128_128 (xmm_src0, xmm_src1));
2881 
2882	    w -= 8;
2883	    src += 8;
2884	    dst += 8;
2885	}
2886 
2887	while (w)
2888	{
2889	    s = *src++;
2890	    *dst = convert_8888_to_0565 (s);
2891	    dst++;
2892	    w--;
2893	}
2894    }
2895}
2896 
2897static void
2898sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
2899			      pixman_composite_info_t *info)
2900{
2901    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
2902    uint32_t    *dst_line, *dst;
2903    uint32_t    *src_line, *src;
2904    int32_t w;
2905    int dst_stride, src_stride;
2906 
2907 
2908    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2909	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
2910    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
2911	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
2912 
2913    while (height--)
2914    {
2915	dst = dst_line;
2916	dst_line += dst_stride;
2917	src = src_line;
2918	src_line += src_stride;
2919	w = width;
2920 
2921	while (w && (uintptr_t)dst & 15)
2922	{
2923	    *dst++ = *src++ | 0xff000000;
2924	    w--;
2925	}
2926 
2927	while (w >= 16)
2928	{
2929	    __m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4;
2930	    
2931	    xmm_src1 = load_128_unaligned ((__m128i*)src + 0);
2932	    xmm_src2 = load_128_unaligned ((__m128i*)src + 1);
2933	    xmm_src3 = load_128_unaligned ((__m128i*)src + 2);
2934	    xmm_src4 = load_128_unaligned ((__m128i*)src + 3);
2935	    
2936	    save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000));
2937	    save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000));
2938	    save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000));
2939	    save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000));
2940	    
2941	    dst += 16;
2942	    src += 16;
2943	    w -= 16;
2944	}
2945 
2946	while (w)
2947	{
2948	    *dst++ = *src++ | 0xff000000;
2949	    w--;
2950	}
2951    }
2952 
2953}
2954 
2955static void
2956sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
2957                                 pixman_composite_info_t *info)
2958{
2959    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
2960    uint32_t    *dst_line, *dst;
2961    uint32_t    *src_line, *src;
2962    uint32_t mask;
2963    int dst_stride, src_stride;
2964    int32_t w;
2965 
2966    __m128i xmm_mask, xmm_alpha;
2967    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
2968    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
2969 
2970    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
2971	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
2972    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
2973	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
2974 
2975    mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
2976 
2977    xmm_mask = create_mask_16_128 (mask >> 24);
2978    xmm_alpha = mask_00ff;
2979 
2980    while (height--)
2981    {
2982	dst = dst_line;
2983	dst_line += dst_stride;
2984	src = src_line;
2985	src_line += src_stride;
2986	w = width;
2987 
2988	while (w && (uintptr_t)dst & 15)
2989	{
2990	    uint32_t s = (*src++) | 0xff000000;
2991	    uint32_t d = *dst;
2992 
2993	    __m128i src   = unpack_32_1x128 (s);
2994	    __m128i alpha = xmm_alpha;
2995	    __m128i mask  = xmm_mask;
2996	    __m128i dest  = unpack_32_1x128 (d);
2997 
2998	    *dst++ = pack_1x128_32 (
2999		in_over_1x128 (&src, &alpha, &mask, &dest));
3000 
3001	    w--;
3002	}
3003 
3004	while (w >= 4)
3005	{
3006	    xmm_src = _mm_or_si128 (
3007		load_128_unaligned ((__m128i*)src), mask_ff000000);
3008	    xmm_dst = load_128_aligned ((__m128i*)dst);
3009 
3010	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3011	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
3012 
3013	    in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
3014			   &xmm_alpha, &xmm_alpha,
3015			   &xmm_mask, &xmm_mask,
3016			   &xmm_dst_lo, &xmm_dst_hi);
3017 
3018	    save_128_aligned (
3019		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
3020 
3021	    dst += 4;
3022	    src += 4;
3023	    w -= 4;
3024 
3025	}
3026 
3027	while (w)
3028	{
3029	    uint32_t s = (*src++) | 0xff000000;
3030	    uint32_t d = *dst;
3031 
3032	    __m128i src  = unpack_32_1x128 (s);
3033	    __m128i alpha = xmm_alpha;
3034	    __m128i mask  = xmm_mask;
3035	    __m128i dest  = unpack_32_1x128 (d);
3036 
3037	    *dst++ = pack_1x128_32 (
3038		in_over_1x128 (&src, &alpha, &mask, &dest));
3039 
3040	    w--;
3041	}
3042    }
3043 
3044}
3045 
3046static void
3047sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
3048                               pixman_composite_info_t *info)
3049{
3050    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
3051    int dst_stride, src_stride;
3052    uint32_t    *dst_line, *dst;
3053    uint32_t    *src_line, *src;
3054 
3055    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3056	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
3057    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
3058	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
3059 
3060    dst = dst_line;
3061    src = src_line;
3062 
3063    while (height--)
3064    {
3065	sse2_combine_over_u (imp, op, dst, src, NULL((void*)0), width);
3066 
3067	dst += dst_stride;
3068	src += src_stride;
3069    }
3070}
3071 
3072static force_inline__inline__ __attribute__ ((__always_inline__)) uint16_t
3073composite_over_8888_0565pixel (uint32_t src, uint16_t dst)
3074{
3075    __m128i ms;
3076 
3077    ms = unpack_32_1x128 (src);
3078    return pack_565_32_16 (
3079	pack_1x128_32 (
3080	    over_1x128 (
3081		ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst))));
3082}
3083 
3084static void
3085sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
3086                               pixman_composite_info_t *info)
3087{
3088    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
3089    uint16_t    *dst_line, *dst, d;
3090    uint32_t    *src_line, *src, s;
3091    int dst_stride, src_stride;
3092    int32_t w;
3093 
3094    __m128i xmm_alpha_lo, xmm_alpha_hi;
3095    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
3096    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
3097 
3098    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3099	dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
3100    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
3101	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
3102 
3103    while (height--)
3104    {
3105	dst = dst_line;
3106	src = src_line;
3107 
3108	dst_line += dst_stride;
3109	src_line += src_stride;
3110	w = width;
3111 
3112	/* Align dst on a 16-byte boundary */
3113	while (w &&
3114	       ((uintptr_t)dst & 15))
3115	{
3116	    s = *src++;
3117	    d = *dst;
3118 
3119	    *dst++ = composite_over_8888_0565pixel (s, d);
3120	    w--;
3121	}
3122 
3123	/* It's a 8 pixel loop */
3124	while (w >= 8)
3125	{
3126	    /* I'm loading unaligned because I'm not sure
3127	     * about the address alignment.
3128	     */
3129	    xmm_src = load_128_unaligned ((__m128i*) src);
3130	    xmm_dst = load_128_aligned ((__m128i*) dst);
3131 
3132	    /* Unpacking */
3133	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3134	    unpack_565_128_4x128 (xmm_dst,
3135				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
3136	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
3137				&xmm_alpha_lo, &xmm_alpha_hi);
3138 
3139	    /* I'm loading next 4 pixels from memory
3140	     * before to optimze the memory read.
3141	     */
3142	    xmm_src = load_128_unaligned ((__m128i*) (src + 4));
3143 
3144	    over_2x128 (&xmm_src_lo, &xmm_src_hi,
3145			&xmm_alpha_lo, &xmm_alpha_hi,
3146			&xmm_dst0, &xmm_dst1);
3147 
3148	    /* Unpacking */
3149	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3150	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
3151				&xmm_alpha_lo, &xmm_alpha_hi);
3152 
3153	    over_2x128 (&xmm_src_lo, &xmm_src_hi,
3154			&xmm_alpha_lo, &xmm_alpha_hi,
3155			&xmm_dst2, &xmm_dst3);
3156 
3157	    save_128_aligned (
3158		(__m128i*)dst, pack_565_4x128_128 (
3159		    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
3160 
3161	    w -= 8;
3162	    dst += 8;
3163	    src += 8;
3164	}
3165 
3166	while (w--)
3167	{
3168	    s = *src++;
3169	    d = *dst;
3170 
3171	    *dst++ = composite_over_8888_0565pixel (s, d);
3172	}
3173    }
3174 
3175}
3176 
3177static void
3178sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
3179                              pixman_composite_info_t *info)
3180{
3181    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
3182    uint32_t src, srca;
3183    uint32_t *dst_line, *dst;
3184    uint8_t *mask_line, *mask;
3185    int dst_stride, mask_stride;
3186    int32_t w;
3187    uint32_t d;
3188 
3189    __m128i xmm_src, xmm_alpha, xmm_def;
3190    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
3191    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
3192 
3193    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
3194 
3195    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
3196 
3197    srca = src >> 24;
3198    if (src == 0)
3199	return;
3200 
3201    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3202	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
3203    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
3204	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);
3205 
3206    xmm_def = create_mask_2x32_128 (src, src);
3207    xmm_src = expand_pixel_32_1x128 (src);
3208    xmm_alpha = expand_alpha_1x128 (xmm_src);
3209    mmx_src   = xmm_src;
3210    mmx_alpha = xmm_alpha;
3211 
3212    while (height--)
3213    {
3214	dst = dst_line;
3215	dst_line += dst_stride;
3216	mask = mask_line;
3217	mask_line += mask_stride;
3218	w = width;
3219 
3220	while (w && (uintptr_t)dst & 15)
3221	{
3222	    uint8_t m = *mask++;
3223 
3224	    if (m)
3225	    {
3226		d = *dst;
3227		mmx_mask = expand_pixel_8_1x128 (m);
3228		mmx_dest = unpack_32_1x128 (d);
3229 
3230		*dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
3231		                                   &mmx_alpha,
3232		                                   &mmx_mask,
3233		                                   &mmx_dest));
3234	    }
3235 
3236	    w--;
3237	    dst++;
3238	}
3239 
3240	while (w >= 4)
3241	{
3242            uint32_t m;
3243            memcpy(&m, mask, sizeof(uint32_t));
3244 
3245	    if (srca == 0xff && m == 0xffffffff)
3246	    {
3247		save_128_aligned ((__m128i*)dst, xmm_def);
3248	    }
3249	    else if (m)
3250	    {
3251		xmm_dst = load_128_aligned ((__m128i*) dst);
3252		xmm_mask = unpack_32_1x128 (m);
3253		xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
3254 
3255		/* Unpacking */
3256		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
3257		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3258 
3259		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
3260					&xmm_mask_lo, &xmm_mask_hi);
3261 
3262		in_over_2x128 (&xmm_src, &xmm_src,
3263			       &xmm_alpha, &xmm_alpha,
3264			       &xmm_mask_lo, &xmm_mask_hi,
3265			       &xmm_dst_lo, &xmm_dst_hi);
3266 
3267		save_128_aligned (
3268		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
3269	    }
3270 
3271	    w -= 4;
3272	    dst += 4;
3273	    mask += 4;
3274	}
3275 
3276	while (w)
3277	{
3278	    uint8_t m = *mask++;
3279 
3280	    if (m)
3281	    {
3282		d = *dst;
3283		mmx_mask = expand_pixel_8_1x128 (m);
3284		mmx_dest = unpack_32_1x128 (d);
3285 
3286		*dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
3287		                                   &mmx_alpha,
3288		                                   &mmx_mask,
3289		                                   &mmx_dest));
3290	    }
3291 
3292	    w--;
3293	    dst++;
3294	}
3295    }
3296 
3297}
3298 
3299#if defined(__GNUC__4) && !defined(__x86_64__1) && !defined(__amd64__1)
3300__attribute__((__force_align_arg_pointer__))
3301#endif
3302static pixman_bool_t
3303sse2_fill (pixman_implementation_t *imp,
3304           uint32_t *               bits,
3305           int                      stride,
3306           int                      bpp,
3307           int                      x,
3308           int                      y,
3309           int                      width,
3310           int                      height,
3311           uint32_t		    filler)
3312{
3313    uint32_t byte_width;
3314    uint8_t *byte_line;
3315 
3316    __m128i xmm_def;
3317 
3318    if (bpp == 8)
3319    {
3320	uint32_t b;
3321	uint32_t w;
3322 
3323	stride = stride * (int) sizeof (uint32_t) / 1;
3324	byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
3325	byte_width = width;
3326	stride *= 1;
3327 
3328	b = filler & 0xff;
3329	w = (b << 8) | b;
3330	filler = (w << 16) | w;
3331    }
3332    else if (bpp == 16)
3333    {
3334	stride = stride * (int) sizeof (uint32_t) / 2;
3335	byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
3336	byte_width = 2 * width;
3337	stride *= 2;
3338 
3339        filler = (filler & 0xffff) * 0x00010001;
3340    }
3341    else if (bpp == 32)
3342    {
3343	stride = stride * (int) sizeof (uint32_t) / 4;
3344	byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
3345	byte_width = 4 * width;
3346	stride *= 4;
3347    }
3348    else
3349    {
3350	return FALSE0;
3351    }
3352 
3353    xmm_def = create_mask_2x32_128 (filler, filler);
3354 
3355    while (height--)
3356    {
3357	int w;
3358	uint8_t *d = byte_line;
3359	byte_line += stride;
3360	w = byte_width;
3361 
3362	if (w >= 1 && ((uintptr_t)d & 1))
3363	{
3364	    *(uint8_t *)d = filler;
3365	    w -= 1;
3366	    d += 1;
3367	}
3368 
3369	while (w >= 2 && ((uintptr_t)d & 3))
3370	{
3371	    *(uint16_t *)d = filler;
3372	    w -= 2;
3373	    d += 2;
3374	}
3375 
3376	while (w >= 4 && ((uintptr_t)d & 15))
3377	{
3378	    *(uint32_t *)d = filler;
3379 
3380	    w -= 4;
3381	    d += 4;
3382	}
3383 
3384	while (w >= 128)
3385	{
3386	    save_128_aligned ((__m128i*)(d),     xmm_def);
3387	    save_128_aligned ((__m128i*)(d + 16),  xmm_def);
3388	    save_128_aligned ((__m128i*)(d + 32),  xmm_def);
3389	    save_128_aligned ((__m128i*)(d + 48),  xmm_def);
3390	    save_128_aligned ((__m128i*)(d + 64),  xmm_def);
3391	    save_128_aligned ((__m128i*)(d + 80),  xmm_def);
3392	    save_128_aligned ((__m128i*)(d + 96),  xmm_def);
3393	    save_128_aligned ((__m128i*)(d + 112), xmm_def);
3394 
3395	    d += 128;
3396	    w -= 128;
3397	}
3398 
3399	if (w >= 64)
3400	{
3401	    save_128_aligned ((__m128i*)(d),     xmm_def);
3402	    save_128_aligned ((__m128i*)(d + 16),  xmm_def);
3403	    save_128_aligned ((__m128i*)(d + 32),  xmm_def);
3404	    save_128_aligned ((__m128i*)(d + 48),  xmm_def);
3405 
3406	    d += 64;
3407	    w -= 64;
3408	}
3409 
3410	if (w >= 32)
3411	{
3412	    save_128_aligned ((__m128i*)(d),     xmm_def);
3413	    save_128_aligned ((__m128i*)(d + 16),  xmm_def);
3414 
3415	    d += 32;
3416	    w -= 32;
3417	}
3418 
3419	if (w >= 16)
3420	{
3421	    save_128_aligned ((__m128i*)(d),     xmm_def);
3422 
3423	    d += 16;
3424	    w -= 16;
3425	}
3426 
3427	while (w >= 4)
3428	{
3429	    *(uint32_t *)d = filler;
3430 
3431	    w -= 4;
3432	    d += 4;
3433	}
3434 
3435	if (w >= 2)
3436	{
3437	    *(uint16_t *)d = filler;
3438	    w -= 2;
3439	    d += 2;
3440	}
3441 
3442	if (w >= 1)
3443	{
3444	    *(uint8_t *)d = filler;
3445	    w -= 1;
3446	    d += 1;
3447	}
3448    }
3449 
3450    return TRUE1;
3451}
3452 
3453static void
3454sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
3455                             pixman_composite_info_t *info)
3456{
3457    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
3458    uint32_t src, srca;
3459    uint32_t    *dst_line, *dst;
3460    uint8_t     *mask_line, *mask;
3461    int dst_stride, mask_stride;
3462    int32_t w;
3463 
3464    __m128i xmm_src, xmm_def;
3465    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
3466 
3467    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
3468 
3469    srca = src >> 24;
3470    if (src == 0)
3471    {
3472	sse2_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride,
3473		   PIXMAN_FORMAT_BPP (dest_image->bits.format)(((dest_image->bits.format >> (24)) & ((1 <<
 (8)) - 1)) << ((dest_image->bits.format >> 22
) & 3)),
3474		   dest_x, dest_y, width, height, 0);
3475	return;
3476    }
3477 
3478    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3479	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
3480    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
3481	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);
3482 
3483    xmm_def = create_mask_2x32_128 (src, src);
3484    xmm_src = expand_pixel_32_1x128 (src);
3485 
3486    while (height--)
3487    {
3488	dst = dst_line;
3489	dst_line += dst_stride;
3490	mask = mask_line;
3491	mask_line += mask_stride;
3492	w = width;
3493 
3494	while (w && (uintptr_t)dst & 15)
3495	{
3496	    uint8_t m = *mask++;
3497 
3498	    if (m)
3499	    {
3500		*dst = pack_1x128_32 (
3501		    pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)));
3502	    }
3503	    else
3504	    {
3505		*dst = 0;
3506	    }
3507 
3508	    w--;
3509	    dst++;
3510	}
3511 
3512	while (w >= 4)
3513	{
3514            uint32_t m;
3515            memcpy(&m, mask, sizeof(uint32_t));
3516 
3517	    if (srca == 0xff && m == 0xffffffff)
3518	    {
3519		save_128_aligned ((__m128i*)dst, xmm_def);
3520	    }
3521	    else if (m)
3522	    {
3523		xmm_mask = unpack_32_1x128 (m);
3524		xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
3525 
3526		/* Unpacking */
3527		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3528 
3529		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
3530					&xmm_mask_lo, &xmm_mask_hi);
3531 
3532		pix_multiply_2x128 (&xmm_src, &xmm_src,
3533				    &xmm_mask_lo, &xmm_mask_hi,
3534				    &xmm_mask_lo, &xmm_mask_hi);
3535 
3536		save_128_aligned (
3537		    (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
3538	    }
3539	    else
3540	    {
3541		save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ());
3542	    }
3543 
3544	    w -= 4;
3545	    dst += 4;
3546	    mask += 4;
3547	}
3548 
3549	while (w)
3550	{
3551	    uint8_t m = *mask++;
3552 
3553	    if (m)
3554	    {
3555		*dst = pack_1x128_32 (
3556		    pix_multiply_1x128 (
3557			xmm_src, expand_pixel_8_1x128 (m)));
3558	    }
3559	    else
3560	    {
3561		*dst = 0;
3562	    }
3563 
3564	    w--;
3565	    dst++;
3566	}
3567    }
3568 
3569}
3570 
3571static void
3572sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
3573                              pixman_composite_info_t *info)
3574{
3575    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
3576    uint32_t src;
3577    uint16_t    *dst_line, *dst, d;
3578    uint8_t     *mask_line, *mask;
3579    int dst_stride, mask_stride;
3580    int32_t w;
3581    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
3582 
3583    __m128i xmm_src, xmm_alpha;
3584    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
3585    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
3586 
3587    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
3588 
3589    if (src == 0)
3590	return;
3591 
3592    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3593	dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
3594    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
3595	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);
3596 
3597    xmm_src = expand_pixel_32_1x128 (src);
3598    xmm_alpha = expand_alpha_1x128 (xmm_src);
3599    mmx_src = xmm_src;
3600    mmx_alpha = xmm_alpha;
3601 
3602    while (height--)
3603    {
3604	dst = dst_line;
3605	dst_line += dst_stride;
3606	mask = mask_line;
3607	mask_line += mask_stride;
3608	w = width;
3609 
3610	while (w && (uintptr_t)dst & 15)
3611	{
3612	    uint8_t m = *mask++;
3613 
3614	    if (m)
3615	    {
3616		d = *dst;
3617		mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
3618		mmx_dest = expand565_16_1x128 (d);
3619 
3620		*dst = pack_565_32_16 (
3621		    pack_1x128_32 (
3622			in_over_1x128 (
3623			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
3624	    }
3625 
3626	    w--;
3627	    dst++;
3628	}
3629 
3630	while (w >= 8)
3631	{
3632            uint32_t m;
3633 
3634	    xmm_dst = load_128_aligned ((__m128i*) dst);
3635	    unpack_565_128_4x128 (xmm_dst,
3636				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
3637 
3638            memcpy(&m, mask, sizeof(uint32_t));
3639	    mask += 4;
3640 
3641	    if (m)
3642	    {
3643		xmm_mask = unpack_32_1x128 (m);
3644		xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
3645 
3646		/* Unpacking */
3647		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3648 
3649		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
3650					&xmm_mask_lo, &xmm_mask_hi);
3651 
3652		in_over_2x128 (&xmm_src, &xmm_src,
3653			       &xmm_alpha, &xmm_alpha,
3654			       &xmm_mask_lo, &xmm_mask_hi,
3655			       &xmm_dst0, &xmm_dst1);
3656	    }
3657 
3658            memcpy(&m, mask, sizeof(uint32_t));
3659	    mask += 4;
3660 
3661	    if (m)
3662	    {
3663		xmm_mask = unpack_32_1x128 (m);
3664		xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
3665 
3666		/* Unpacking */
3667		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3668 
3669		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
3670					&xmm_mask_lo, &xmm_mask_hi);
3671		in_over_2x128 (&xmm_src, &xmm_src,
3672			       &xmm_alpha, &xmm_alpha,
3673			       &xmm_mask_lo, &xmm_mask_hi,
3674			       &xmm_dst2, &xmm_dst3);
3675	    }
3676 
3677	    save_128_aligned (
3678		(__m128i*)dst, pack_565_4x128_128 (
3679		    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
3680 
3681	    w -= 8;
3682	    dst += 8;
3683	}
3684 
3685	while (w)
3686	{
3687	    uint8_t m = *mask++;
3688 
3689	    if (m)
3690	    {
3691		d = *dst;
3692		mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
3693		mmx_dest = expand565_16_1x128 (d);
3694 
3695		*dst = pack_565_32_16 (
3696		    pack_1x128_32 (
3697			in_over_1x128 (
3698			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
3699	    }
3700 
3701	    w--;
3702	    dst++;
3703	}
3704    }
3705 
3706}
3707 
3708static void
3709sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
3710                                 pixman_composite_info_t *info)
3711{
3712    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
3713    uint16_t    *dst_line, *dst, d;
3714    uint32_t    *src_line, *src, s;
3715    int dst_stride, src_stride;
3716    int32_t w;
3717    uint32_t opaque, zero;
3718 
3719    __m128i ms;
3720    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
3721    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
3722 
3723    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3724	dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
3725    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
3726	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
3727 
3728    while (height--)
3729    {
3730	dst = dst_line;
3731	dst_line += dst_stride;
3732	src = src_line;
3733	src_line += src_stride;
3734	w = width;
3735 
3736	while (w && (uintptr_t)dst & 15)
3737	{
3738	    s = *src++;
3739	    d = *dst;
3740 
3741	    ms = unpack_32_1x128 (s);
3742 
3743	    *dst++ = pack_565_32_16 (
3744		pack_1x128_32 (
3745		    over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
3746	    w--;
3747	}
3748 
3749	while (w >= 8)
3750	{
3751	    /* First round */
3752	    xmm_src = load_128_unaligned ((__m128i*)src);
3753	    xmm_dst = load_128_aligned  ((__m128i*)dst);
3754 
3755	    opaque = is_opaque (xmm_src);
3756	    zero = is_zero (xmm_src);
3757 
3758	    unpack_565_128_4x128 (xmm_dst,
3759				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
3760	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3761 
3762	    /* preload next round*/
3763	    xmm_src = load_128_unaligned ((__m128i*)(src + 4));
3764 
3765	    if (opaque)
3766	    {
3767		invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
3768				     &xmm_dst0, &xmm_dst1);
3769	    }
3770	    else if (!zero)
3771	    {
3772		over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
3773					&xmm_dst0, &xmm_dst1);
3774	    }
3775 
3776	    /* Second round */
3777	    opaque = is_opaque (xmm_src);
3778	    zero = is_zero (xmm_src);
3779 
3780	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3781 
3782	    if (opaque)
3783	    {
3784		invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
3785				     &xmm_dst2, &xmm_dst3);
3786	    }
3787	    else if (!zero)
3788	    {
3789		over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
3790					&xmm_dst2, &xmm_dst3);
3791	    }
3792 
3793	    save_128_aligned (
3794		(__m128i*)dst, pack_565_4x128_128 (
3795		    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
3796 
3797	    w -= 8;
3798	    src += 8;
3799	    dst += 8;
3800	}
3801 
3802	while (w)
3803	{
3804	    s = *src++;
3805	    d = *dst;
3806 
3807	    ms = unpack_32_1x128 (s);
3808 
3809	    *dst++ = pack_565_32_16 (
3810		pack_1x128_32 (
3811		    over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
3812	    w--;
3813	}
3814    }
3815 
3816}
3817 
3818static void
3819sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
3820                                 pixman_composite_info_t *info)
3821{
3822    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
3823    uint32_t    *dst_line, *dst, d;
3824    uint32_t    *src_line, *src, s;
3825    int dst_stride, src_stride;
3826    int32_t w;
3827    uint32_t opaque, zero;
3828 
3829    __m128i xmm_src_lo, xmm_src_hi;
3830    __m128i xmm_dst_lo, xmm_dst_hi;
3831 
3832    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3833	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
3834    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
3835	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
3836 
3837    while (height--)
3838    {
3839	dst = dst_line;
3840	dst_line += dst_stride;
3841	src = src_line;
3842	src_line += src_stride;
3843	w = width;
3844 
3845	while (w && (uintptr_t)dst & 15)
3846	{
3847	    s = *src++;
3848	    d = *dst;
3849 
3850	    *dst++ = pack_1x128_32 (
3851		over_rev_non_pre_1x128 (
3852		    unpack_32_1x128 (s), unpack_32_1x128 (d)));
3853 
3854	    w--;
3855	}
3856 
3857	while (w >= 4)
3858	{
3859	    xmm_src_hi = load_128_unaligned ((__m128i*)src);
3860 
3861	    opaque = is_opaque (xmm_src_hi);
3862	    zero = is_zero (xmm_src_hi);
3863 
3864	    unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
3865 
3866	    if (opaque)
3867	    {
3868		invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
3869				     &xmm_dst_lo, &xmm_dst_hi);
3870 
3871		save_128_aligned (
3872		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
3873	    }
3874	    else if (!zero)
3875	    {
3876		xmm_dst_hi = load_128_aligned  ((__m128i*)dst);
3877 
3878		unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
3879 
3880		over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
3881					&xmm_dst_lo, &xmm_dst_hi);
3882 
3883		save_128_aligned (
3884		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
3885	    }
3886 
3887	    w -= 4;
3888	    dst += 4;
3889	    src += 4;
3890	}
3891 
3892	while (w)
3893	{
3894	    s = *src++;
3895	    d = *dst;
3896 
3897	    *dst++ = pack_1x128_32 (
3898		over_rev_non_pre_1x128 (
3899		    unpack_32_1x128 (s), unpack_32_1x128 (d)));
3900 
3901	    w--;
3902	}
3903    }
3904 
3905}
3906 
3907static void
3908sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
3909                                    pixman_composite_info_t *info)
3910{
3911    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
3912    uint32_t src;
3913    uint16_t    *dst_line, *dst, d;
3914    uint32_t    *mask_line, *mask, m;
3915    int dst_stride, mask_stride;
3916    int w;
3917    uint32_t pack_cmp;
3918 
3919    __m128i xmm_src, xmm_alpha;
3920    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
3921    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
3922 
3923    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
3924 
3925    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
3926 
3927    if (src == 0)
3928	return;
3929 
3930    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
3931	dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
3932    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
3933	mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);
3934 
3935    xmm_src = expand_pixel_32_1x128 (src);
3936    xmm_alpha = expand_alpha_1x128 (xmm_src);
3937    mmx_src = xmm_src;
3938    mmx_alpha = xmm_alpha;
3939 
3940    while (height--)
3941    {
3942	w = width;
3943	mask = mask_line;
3944	dst = dst_line;
3945	mask_line += mask_stride;
3946	dst_line += dst_stride;
3947 
3948	while (w && ((uintptr_t)dst & 15))
3949	{
3950	    m = *(uint32_t *) mask;
3951 
3952	    if (m)
3953	    {
3954		d = *dst;
3955		mmx_mask = unpack_32_1x128 (m);
3956		mmx_dest = expand565_16_1x128 (d);
3957 
3958		*dst = pack_565_32_16 (
3959		    pack_1x128_32 (
3960			in_over_1x128 (
3961			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
3962	    }
3963 
3964	    w--;
3965	    dst++;
3966	    mask++;
3967	}
3968 
3969	while (w >= 8)
3970	{
3971	    /* First round */
3972	    xmm_mask = load_128_unaligned ((__m128i*)mask);
3973	    xmm_dst = load_128_aligned ((__m128i*)dst);
3974 
3975	    pack_cmp = _mm_movemask_epi8 (
3976		_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
3977 
3978	    unpack_565_128_4x128 (xmm_dst,
3979				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
3980	    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3981 
3982	    /* preload next round */
3983	    xmm_mask = load_128_unaligned ((__m128i*)(mask + 4));
3984 
3985	    /* preload next round */
3986	    if (pack_cmp != 0xffff)
3987	    {
3988		in_over_2x128 (&xmm_src, &xmm_src,
3989			       &xmm_alpha, &xmm_alpha,
3990			       &xmm_mask_lo, &xmm_mask_hi,
3991			       &xmm_dst0, &xmm_dst1);
3992	    }
3993 
3994	    /* Second round */
3995	    pack_cmp = _mm_movemask_epi8 (
3996		_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
3997 
3998	    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3999 
4000	    if (pack_cmp != 0xffff)
4001	    {
4002		in_over_2x128 (&xmm_src, &xmm_src,
4003			       &xmm_alpha, &xmm_alpha,
4004			       &xmm_mask_lo, &xmm_mask_hi,
4005			       &xmm_dst2, &xmm_dst3);
4006	    }
4007 
4008	    save_128_aligned (
4009		(__m128i*)dst, pack_565_4x128_128 (
4010		    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
4011 
4012	    w -= 8;
4013	    dst += 8;
4014	    mask += 8;
4015	}
4016 
4017	while (w)
4018	{
4019	    m = *(uint32_t *) mask;
4020 
4021	    if (m)
4022	    {
4023		d = *dst;
4024		mmx_mask = unpack_32_1x128 (m);
4025		mmx_dest = expand565_16_1x128 (d);
4026 
4027		*dst = pack_565_32_16 (
4028		    pack_1x128_32 (
4029			in_over_1x128 (
4030			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
4031	    }
4032 
4033	    w--;
4034	    dst++;
4035	    mask++;
4036	}
4037    }
4038 
4039}
4040 
4041static void
4042sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
4043                         pixman_composite_info_t *info)
4044{
4045    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
4046    uint8_t     *dst_line, *dst;
4047    uint8_t     *mask_line, *mask;
4048    int dst_stride, mask_stride;
4049    uint32_t d;
4050    uint32_t src;
4051    int32_t w;
4052 
4053    __m128i xmm_alpha;
4054    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
4055    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4056 
4057    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4058	dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
4059    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
4060	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);
4061 
4062    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
4063 
4064    xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
4065 
4066    while (height--)
4067    {
4068	dst = dst_line;
4069	dst_line += dst_stride;
4070	mask = mask_line;
4071	mask_line += mask_stride;
4072	w = width;
4073 
4074	while (w && ((uintptr_t)dst & 15))
4075	{
4076	    uint8_t m = *mask++;
4077	    d = (uint32_t) *dst;
4078 
4079	    *dst++ = (uint8_t) pack_1x128_32 (
4080		pix_multiply_1x128 (
4081		    pix_multiply_1x128 (xmm_alpha,
4082				       unpack_32_1x128 (m)),
4083		    unpack_32_1x128 (d)));
4084	    w--;
4085	}
4086 
4087	while (w >= 16)
4088	{
4089	    xmm_mask = load_128_unaligned ((__m128i*)mask);
4090	    xmm_dst = load_128_aligned ((__m128i*)dst);
4091 
4092	    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
4093	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4094 
4095	    pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
4096				&xmm_mask_lo, &xmm_mask_hi,
4097				&xmm_mask_lo, &xmm_mask_hi);
4098 
4099	    pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
4100				&xmm_dst_lo, &xmm_dst_hi,
4101				&xmm_dst_lo, &xmm_dst_hi);
4102 
4103	    save_128_aligned (
4104		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4105 
4106	    mask += 16;
4107	    dst += 16;
4108	    w -= 16;
4109	}
4110 
4111	while (w)
4112	{
4113	    uint8_t m = *mask++;
4114	    d = (uint32_t) *dst;
4115 
4116	    *dst++ = (uint8_t) pack_1x128_32 (
4117		pix_multiply_1x128 (
4118		    pix_multiply_1x128 (
4119			xmm_alpha, unpack_32_1x128 (m)),
4120		    unpack_32_1x128 (d)));
4121	    w--;
4122	}
4123    }
4124 
4125}
4126 
4127static void
4128sse2_composite_in_n_8 (pixman_implementation_t *imp,
4129		       pixman_composite_info_t *info)
4130{
4131    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
4132    uint8_t     *dst_line, *dst;
4133    int dst_stride;
4134    uint32_t d;
4135    uint32_t src;
4136    int32_t w;
4137 
4138    __m128i xmm_alpha;
4139    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4140 
4141    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4142	dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
4143 
4144    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
4145 
4146    xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
4147 
4148    src = src >> 24;
4149 
4150    if (src == 0xff)
4151	return;
4152 
4153    if (src == 0x00)
4154    {
4155	pixman_fill_moz_pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
4156		     8, dest_x, dest_y, width, height, src);
4157 
4158	return;
4159    }
4160 
4161    while (height--)
4162    {
4163	dst = dst_line;
4164	dst_line += dst_stride;
4165	w = width;
4166 
4167	while (w && ((uintptr_t)dst & 15))
4168	{
4169	    d = (uint32_t) *dst;
4170 
4171	    *dst++ = (uint8_t) pack_1x128_32 (
4172		pix_multiply_1x128 (
4173		    xmm_alpha,
4174		    unpack_32_1x128 (d)));
4175	    w--;
4176	}
4177 
4178	while (w >= 16)
4179	{
4180	    xmm_dst = load_128_aligned ((__m128i*)dst);
4181 
4182	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4183	    
4184	    pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
4185				&xmm_dst_lo, &xmm_dst_hi,
4186				&xmm_dst_lo, &xmm_dst_hi);
4187 
4188	    save_128_aligned (
4189		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4190 
4191	    dst += 16;
4192	    w -= 16;
4193	}
4194 
4195	while (w)
4196	{
4197	    d = (uint32_t) *dst;
4198 
4199	    *dst++ = (uint8_t) pack_1x128_32 (
4200		pix_multiply_1x128 (
4201		    xmm_alpha,
4202		    unpack_32_1x128 (d)));
4203	    w--;
4204	}
4205    }
4206 
4207}
4208 
4209static void
4210sse2_composite_in_8_8 (pixman_implementation_t *imp,
4211                       pixman_composite_info_t *info)
4212{
4213    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
4214    uint8_t     *dst_line, *dst;
4215    uint8_t     *src_line, *src;
4216    int src_stride, dst_stride;
4217    int32_t w;
4218    uint32_t s, d;
4219 
4220    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
4221    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4222 
4223    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4224	dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
4225    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4226	src_image, src_x, src_y, uint8_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
4227 
4228    while (height--)
4229    {
4230	dst = dst_line;
4231	dst_line += dst_stride;
4232	src = src_line;
4233	src_line += src_stride;
4234	w = width;
4235 
4236	while (w && ((uintptr_t)dst & 15))
4237	{
4238	    s = (uint32_t) *src++;
4239	    d = (uint32_t) *dst;
4240 
4241	    *dst++ = (uint8_t) pack_1x128_32 (
4242		pix_multiply_1x128 (
4243		    unpack_32_1x128 (s), unpack_32_1x128 (d)));
4244	    w--;
4245	}
4246 
4247	while (w >= 16)
4248	{
4249	    xmm_src = load_128_unaligned ((__m128i*)src);
4250	    xmm_dst = load_128_aligned ((__m128i*)dst);
4251 
4252	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
4253	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4254 
4255	    pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
4256				&xmm_dst_lo, &xmm_dst_hi,
4257				&xmm_dst_lo, &xmm_dst_hi);
4258 
4259	    save_128_aligned (
4260		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4261 
4262	    src += 16;
4263	    dst += 16;
4264	    w -= 16;
4265	}
4266 
4267	while (w)
4268	{
4269	    s = (uint32_t) *src++;
4270	    d = (uint32_t) *dst;
4271 
4272	    *dst++ = (uint8_t) pack_1x128_32 (
4273		pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d)));
4274	    w--;
4275	}
4276    }
4277 
4278}
4279 
4280static void
4281sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
4282			  pixman_composite_info_t *info)
4283{
4284    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
4285    uint8_t     *dst_line, *dst;
4286    uint8_t     *mask_line, *mask;
4287    int dst_stride, mask_stride;
4288    int32_t w;
4289    uint32_t src;
4290    uint32_t d;
4291 
4292    __m128i xmm_alpha;
4293    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
4294    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4295 
4296    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4297	dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
4298    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
4299	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);
4300 
4301    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
4302 
4303    xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
4304 
4305    while (height--)
4306    {
4307	dst = dst_line;
4308	dst_line += dst_stride;
4309	mask = mask_line;
4310	mask_line += mask_stride;
4311	w = width;
4312 
4313	while (w && ((uintptr_t)dst & 15))
4314	{
4315	    uint8_t m = *mask++;
4316	    d = (uint32_t) *dst;
4317 
4318	    *dst++ = (uint8_t) pack_1x128_32 (
4319		_mm_adds_epu16 (
4320		    pix_multiply_1x128 (
4321			xmm_alpha, unpack_32_1x128 (m)),
4322		    unpack_32_1x128 (d)));
4323	    w--;
4324	}
4325 
4326	while (w >= 16)
4327	{
4328	    xmm_mask = load_128_unaligned ((__m128i*)mask);
4329	    xmm_dst = load_128_aligned ((__m128i*)dst);
4330 
4331	    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
4332	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4333 
4334	    pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
4335				&xmm_mask_lo, &xmm_mask_hi,
4336				&xmm_mask_lo, &xmm_mask_hi);
4337 
4338	    xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
4339	    xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
4340 
4341	    save_128_aligned (
4342		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4343 
4344	    mask += 16;
4345	    dst += 16;
4346	    w -= 16;
4347	}
4348 
4349	while (w)
4350	{
4351	    uint8_t m = (uint32_t) *mask++;
4352	    d = (uint32_t) *dst;
4353 
4354	    *dst++ = (uint8_t) pack_1x128_32 (
4355		_mm_adds_epu16 (
4356		    pix_multiply_1x128 (
4357			xmm_alpha, unpack_32_1x128 (m)),
4358		    unpack_32_1x128 (d)));
4359 
4360	    w--;
4361	}
4362    }
4363 
4364}
4365 
4366static void
4367sse2_composite_add_n_8 (pixman_implementation_t *imp,
4368			pixman_composite_info_t *info)
4369{
4370    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
4371    uint8_t     *dst_line, *dst;
4372    int dst_stride;
4373    int32_t w;
4374    uint32_t src;
4375 
4376    __m128i xmm_src;
4377 
4378    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4379	dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
4380 
4381    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
4382 
4383    src >>= 24;
4384 
4385    if (src == 0x00)
4386	return;
4387 
4388    if (src == 0xff)
4389    {
4390	pixman_fill_moz_pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
4391		     8, dest_x, dest_y, width, height, 0xff);
4392 
4393	return;
4394    }
4395 
4396    src = (src << 24) | (src << 16) | (src << 8) | src;
4397    xmm_src = _mm_set_epi32 (src, src, src, src);
4398 
4399    while (height--)
4400    {
4401	dst = dst_line;
4402	dst_line += dst_stride;
4403	w = width;
4404 
4405	while (w && ((uintptr_t)dst & 15))
4406	{
4407	    *dst = (uint8_t)_mm_cvtsi128_si32 (
4408		_mm_adds_epu8 (
4409		    xmm_src,
4410		    _mm_cvtsi32_si128 (*dst)));
4411 
4412	    w--;
4413	    dst++;
4414	}
4415 
4416	while (w >= 16)
4417	{
4418	    save_128_aligned (
4419		(__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned  ((__m128i*)dst)));
4420 
4421	    dst += 16;
4422	    w -= 16;
4423	}
4424 
4425	while (w)
4426	{
4427	    *dst = (uint8_t)_mm_cvtsi128_si32 (
4428		_mm_adds_epu8 (
4429		    xmm_src,
4430		    _mm_cvtsi32_si128 (*dst)));
4431 
4432	    w--;
4433	    dst++;
4434	}
4435    }
4436 
4437}
4438 
4439static void
4440sse2_composite_add_8_8 (pixman_implementation_t *imp,
4441			pixman_composite_info_t *info)
4442{
4443    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
4444    uint8_t     *dst_line, *dst;
4445    uint8_t     *src_line, *src;
4446    int dst_stride, src_stride;
4447    int32_t w;
4448    uint16_t t;
4449 
4450    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4451	src_image, src_x, src_y, uint8_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
4452    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4453	dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
4454 
4455    while (height--)
4456    {
4457	dst = dst_line;
4458	src = src_line;
4459 
4460	dst_line += dst_stride;
4461	src_line += src_stride;
4462	w = width;
4463 
4464	/* Small head */
4465	while (w && (uintptr_t)dst & 3)
4466	{
4467	    t = (*dst) + (*src++);
4468	    *dst++ = t | (0 - (t >> 8));
4469	    w--;
4470	}
4471 
4472	sse2_combine_add_u (imp, op,
4473			    (uint32_t*)dst, (uint32_t*)src, NULL((void*)0), w >> 2);
4474 
4475	/* Small tail */
4476	dst += w & 0xfffc;
4477	src += w & 0xfffc;
4478 
4479	w &= 3;
4480 
4481	while (w)
4482	{
4483	    t = (*dst) + (*src++);
4484	    *dst++ = t | (0 - (t >> 8));
4485	    w--;
4486	}
4487    }
4488 
4489}
4490 
4491static void
4492sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
4493                              pixman_composite_info_t *info)
4494{
4495    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
4496    uint32_t    *dst_line, *dst;
4497    uint32_t    *src_line, *src;
4498    int dst_stride, src_stride;
4499 
4500    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4501	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
4502    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4503	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
4504 
4505    while (height--)
4506    {
4507	dst = dst_line;
4508	dst_line += dst_stride;
4509	src = src_line;
4510	src_line += src_stride;
4511 
4512	sse2_combine_add_u (imp, op, dst, src, NULL((void*)0), width);
4513    }
4514}
4515 
4516static void
4517sse2_composite_add_n_8888 (pixman_implementation_t *imp,
4518			   pixman_composite_info_t *info)
4519{
4520    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
4521    uint32_t *dst_line, *dst, src;
4522    int dst_stride;
4523 
4524    __m128i xmm_src;
4525 
4526    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
4527 
4528    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
4529    if (src == 0)
4530	return;
4531 
4532    if (src == ~0)
4533    {
4534	pixman_fill_moz_pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32,
4535		     dest_x, dest_y, width, height, ~0);
4536 
4537	return;
4538    }
4539 
4540    xmm_src = _mm_set_epi32 (src, src, src, src);
4541    while (height--)
4542    {
4543	int w = width;
4544	uint32_t d;
4545 
4546	dst = dst_line;
4547	dst_line += dst_stride;
4548 
4549	while (w && (uintptr_t)dst & 15)
4550	{
4551	    d = *dst;
4552	    *dst++ =
4553		_mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d)));
4554	    w--;
4555	}
4556 
4557	while (w >= 4)
4558	{
4559	    save_128_aligned
4560		((__m128i*)dst,
4561		 _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
4562 
4563	    dst += 4;
4564	    w -= 4;
4565	}
4566 
4567	while (w--)
4568	{
4569	    d = *dst;
4570	    *dst++ =
4571		_mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src,
4572						  _mm_cvtsi32_si128 (d)));
4573	}
4574    }
4575}
4576 
4577static void
4578sse2_composite_add_n_8_8888 (pixman_implementation_t *imp,
4579			     pixman_composite_info_t *info)
4580{
4581    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
4582    uint32_t     *dst_line, *dst;
4583    uint8_t     *mask_line, *mask;
4584    int dst_stride, mask_stride;
4585    int32_t w;
4586    uint32_t src;
4587 
4588    __m128i xmm_src;
4589 
4590    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
4591    if (src == 0)
4592	return;
4593    xmm_src = expand_pixel_32_1x128 (src);
4594 
4595    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4596	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
4597    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
4598	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);
4599 
4600    while (height--)
4601    {
4602	dst = dst_line;
4603	dst_line += dst_stride;
4604	mask = mask_line;
4605	mask_line += mask_stride;
4606	w = width;
4607 
4608	while (w && ((uintptr_t)dst & 15))
4609	{
4610	    uint8_t m = *mask++;
4611	    if (m)
4612	    {
4613		*dst = pack_1x128_32
4614		    (_mm_adds_epu16
4615		     (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
4616		      unpack_32_1x128 (*dst)));
4617	    }
4618	    dst++;
4619	    w--;
4620	}
4621 
4622	while (w >= 4)
4623	{
4624	    uint32_t m;
4625            memcpy(&m, mask, sizeof(uint32_t));
4626 
4627	    if (m)
4628	    {
4629		__m128i xmm_mask_lo, xmm_mask_hi;
4630		__m128i xmm_dst_lo, xmm_dst_hi;
4631 
4632		__m128i xmm_dst = load_128_aligned ((__m128i*)dst);
4633		__m128i xmm_mask =
4634		    _mm_unpacklo_epi8 (unpack_32_1x128(m),
4635				       _mm_setzero_si128 ());
4636 
4637		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
4638		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4639 
4640		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
4641					&xmm_mask_lo, &xmm_mask_hi);
4642 
4643		pix_multiply_2x128 (&xmm_src, &xmm_src,
4644				    &xmm_mask_lo, &xmm_mask_hi,
4645				    &xmm_mask_lo, &xmm_mask_hi);
4646 
4647		xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
4648		xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
4649 
4650		save_128_aligned (
4651		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4652	    }
4653 
4654	    w -= 4;
4655	    dst += 4;
4656	    mask += 4;
4657	}
4658 
4659	while (w)
4660	{
4661	    uint8_t m = *mask++;
4662	    if (m)
4663	    {
4664		*dst = pack_1x128_32
4665		    (_mm_adds_epu16
4666		     (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
4667		      unpack_32_1x128 (*dst)));
4668	    }
4669	    dst++;
4670	    w--;
4671	}
4672    }
4673}
4674 
4675static pixman_bool_t
4676sse2_blt (pixman_implementation_t *imp,
4677          uint32_t *               src_bits,
4678          uint32_t *               dst_bits,
4679          int                      src_stride,
4680          int                      dst_stride,
4681          int                      src_bpp,
4682          int                      dst_bpp,
4683          int                      src_x,
4684          int                      src_y,
4685          int                      dest_x,
4686          int                      dest_y,
4687          int                      width,
4688          int                      height)
4689{
4690    uint8_t *   src_bytes;
4691    uint8_t *   dst_bytes;
4692    int byte_width;
4693 
4694    if (src_bpp != dst_bpp)
4695	return FALSE0;
4696 
4697    if (src_bpp == 16)
4698    {
4699	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
4700	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
4701	src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
4702	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
4703	byte_width = 2 * width;
4704	src_stride *= 2;
4705	dst_stride *= 2;
4706    }
4707    else if (src_bpp == 32)
4708    {
4709	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
4710	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
4711	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
4712	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
4713	byte_width = 4 * width;
4714	src_stride *= 4;
4715	dst_stride *= 4;
4716    }
4717    else
4718    {
4719	return FALSE0;
4720    }
4721 
4722    while (height--)
4723    {
4724	int w;
4725	uint8_t *s = src_bytes;
4726	uint8_t *d = dst_bytes;
4727	src_bytes += src_stride;
4728	dst_bytes += dst_stride;
4729	w = byte_width;
4730 
4731	while (w >= 2 && ((uintptr_t)d & 3))
4732	{
4733            memmove(d, s, 2);
4734	    w -= 2;
4735	    s += 2;
4736	    d += 2;
4737	}
4738 
4739	while (w >= 4 && ((uintptr_t)d & 15))
4740	{
4741            memmove(d, s, 4);
4742 
4743	    w -= 4;
4744	    s += 4;
4745	    d += 4;
4746	}
4747 
4748	while (w >= 64)
4749	{
4750	    __m128i xmm0, xmm1, xmm2, xmm3;
4751 
4752	    xmm0 = load_128_unaligned ((__m128i*)(s));
4753	    xmm1 = load_128_unaligned ((__m128i*)(s + 16));
4754	    xmm2 = load_128_unaligned ((__m128i*)(s + 32));
4755	    xmm3 = load_128_unaligned ((__m128i*)(s + 48));
4756 
4757	    save_128_aligned ((__m128i*)(d),    xmm0);
4758	    save_128_aligned ((__m128i*)(d + 16), xmm1);
4759	    save_128_aligned ((__m128i*)(d + 32), xmm2);
4760	    save_128_aligned ((__m128i*)(d + 48), xmm3);
4761 
4762	    s += 64;
4763	    d += 64;
4764	    w -= 64;
4765	}
4766 
4767	while (w >= 16)
4768	{
4769	    save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) );
4770 
4771	    w -= 16;
4772	    d += 16;
4773	    s += 16;
4774	}
4775 
4776	while (w >= 4)
4777	{
4778            memmove(d, s, 4);
4779 
4780	    w -= 4;
4781	    s += 4;
4782	    d += 4;
4783	}
4784 
4785	if (w >= 2)
4786	{
4787            memmove(d, s, 2);
4788	    w -= 2;
4789	    s += 2;
4790	    d += 2;
4791	}
4792    }
4793 
4794    return TRUE1;
4795}
4796 
4797static void
4798sse2_composite_copy_area (pixman_implementation_t *imp,
4799                          pixman_composite_info_t *info)
4800{
4801    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
4802    sse2_blt (imp, src_image->bits.bits,
4803	      dest_image->bits.bits,
4804	      src_image->bits.rowstride,
4805	      dest_image->bits.rowstride,
4806	      PIXMAN_FORMAT_BPP (src_image->bits.format)(((src_image->bits.format >> (24)) & ((1 <<
 (8)) - 1)) << ((src_image->bits.format >> 22)
 & 3)),
4807	      PIXMAN_FORMAT_BPP (dest_image->bits.format)(((dest_image->bits.format >> (24)) & ((1 <<
 (8)) - 1)) << ((dest_image->bits.format >> 22
) & 3)),
4808	      src_x, src_y, dest_x, dest_y, width, height);
4809}
4810 
4811static void
4812sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
4813                                 pixman_composite_info_t *info)
4814{
4815    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
4816    uint32_t    *src, *src_line, s;
4817    uint32_t    *dst, *dst_line, d;
4818    uint8_t         *mask, *mask_line;
4819    int src_stride, mask_stride, dst_stride;
4820    int32_t w;
4821    __m128i ms;
4822 
4823    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
4824    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4825    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
4826 
4827    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4828	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
4829    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
4830	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);
4831    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4832	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
4833 
4834    while (height--)
4835    {
4836        src = src_line;
4837        src_line += src_stride;
4838        dst = dst_line;
4839        dst_line += dst_stride;
4840        mask = mask_line;
4841        mask_line += mask_stride;
4842 
4843        w = width;
4844 
4845        while (w && (uintptr_t)dst & 15)
4846        {
4847            uint8_t m = *mask++;
4848            s = 0xff000000 | *src++;
4849            d = *dst;
4850            ms = unpack_32_1x128 (s);
4851 
4852            if (m != 0xff)
4853            {
4854		__m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
4855		__m128i md = unpack_32_1x128 (d);
4856 
4857                ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md);
4858            }
4859 
4860            *dst++ = pack_1x128_32 (ms);
4861            w--;
4862        }
4863 
4864        while (w >= 4)
4865        {
4866            uint32_t m;
4867            memcpy(&m, mask, sizeof(uint32_t));
4868            xmm_src = _mm_or_si128 (
4869		load_128_unaligned ((__m128i*)src), mask_ff000000);
4870 
4871            if (m == 0xffffffff)
4872            {
4873                save_128_aligned ((__m128i*)dst, xmm_src);
4874            }
4875            else
4876            {
4877                xmm_dst = load_128_aligned ((__m128i*)dst);
4878 
4879                xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
4880 
4881                unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
4882                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
4883                unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4884 
4885                expand_alpha_rev_2x128 (
4886		    xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
4887 
4888                in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
4889			       &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi,
4890			       &xmm_dst_lo, &xmm_dst_hi);
4891 
4892                save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4893            }
4894 
4895            src += 4;
4896            dst += 4;
4897            mask += 4;
4898            w -= 4;
4899        }
4900 
4901        while (w)
4902        {
4903            uint8_t m = *mask++;
4904 
4905            if (m)
4906            {
4907                s = 0xff000000 | *src;
4908 
4909                if (m == 0xff)
4910                {
4911                    *dst = s;
4912                }
4913                else
4914                {
4915		    __m128i ma, md, ms;
4916 
4917                    d = *dst;
4918 
4919		    ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
4920		    md = unpack_32_1x128 (d);
4921		    ms = unpack_32_1x128 (s);
4922 
4923                    *dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md));
4924                }
4925 
4926            }
4927 
4928            src++;
4929            dst++;
4930            w--;
4931        }
4932    }
4933 
4934}
4935 
4936static void
4937sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
4938                                 pixman_composite_info_t *info)
4939{
4940    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
4941    uint32_t    *src, *src_line, s;
4942    uint32_t    *dst, *dst_line, d;
4943    uint8_t         *mask, *mask_line;
4944    int src_stride, mask_stride, dst_stride;
4945    int32_t w;
4946 
4947    __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
4948    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4949    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
4950 
4951    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
4952	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
4953    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
4954	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);
4955    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4956	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
4957 
4958    while (height--)
4959    {
4960        src = src_line;
4961        src_line += src_stride;
4962        dst = dst_line;
4963        dst_line += dst_stride;
4964        mask = mask_line;
4965        mask_line += mask_stride;
4966 
4967        w = width;
4968 
4969        while (w && (uintptr_t)dst & 15)
4970        {
4971	    uint32_t sa;
4972            uint8_t m = *mask++;
4973 
4974            s = *src++;
4975            d = *dst;
4976 
4977	    sa = s >> 24;
4978 
4979	    if (m)
4980	    {
4981		if (sa == 0xff && m == 0xff)
4982		{
4983		    *dst = s;
4984		}
4985		else
4986		{
4987		    __m128i ms, md, ma, msa;
4988 
4989		    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
4990		    ms = unpack_32_1x128 (s);
4991		    md = unpack_32_1x128 (d);
4992 
4993		    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
4994 
4995		    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
4996		}
4997	    }
4998 
4999	    dst++;
5000            w--;
5001        }
5002 
5003        while (w >= 4)
5004        {
5005            uint32_t m;
5006            memcpy(&m, mask, sizeof(uint32_t));
5007 
5008	    if (m)
5009	    {
5010		xmm_src = load_128_unaligned ((__m128i*)src);
5011 
5012		if (m == 0xffffffff && is_opaque (xmm_src))
5013		{
5014		    save_128_aligned ((__m128i *)dst, xmm_src);
5015		}
5016		else
5017		{
5018		    xmm_dst = load_128_aligned ((__m128i *)dst);
5019 
5020		    xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
5021 
5022		    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
5023		    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
5024		    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5025 
5026		    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
5027		    expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
5028 
5029		    in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
5030				   &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
5031 
5032		    save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5033		}
5034	    }
5035 
5036            src += 4;
5037            dst += 4;
5038            mask += 4;
5039            w -= 4;
5040        }
5041 
5042        while (w)
5043        {
5044	    uint32_t sa;
5045            uint8_t m = *mask++;
5046 
5047            s = *src++;
5048            d = *dst;
5049 
5050	    sa = s >> 24;
5051 
5052	    if (m)
5053	    {
5054		if (sa == 0xff && m == 0xff)
5055		{
5056		    *dst = s;
5057		}
5058		else
5059		{
5060		    __m128i ms, md, ma, msa;
5061 
5062		    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
5063		    ms = unpack_32_1x128 (s);
5064		    md = unpack_32_1x128 (d);
5065 
5066		    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
5067 
5068		    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
5069		}
5070	    }
5071 
5072	    dst++;
5073            w--;
5074        }
5075    }
5076 
5077}
5078 
5079static void
5080sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
5081				    pixman_composite_info_t *info)
5082{
5083    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
5084    uint32_t src;
5085    uint32_t    *dst_line, *dst;
5086    __m128i xmm_src;
5087    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
5088    __m128i xmm_dsta_hi, xmm_dsta_lo;
5089    int dst_stride;
5090    int32_t w;
5091 
5092    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
5093 
5094    if (src == 0)
5095	return;
5096 
5097    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
5098	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
5099 
5100    xmm_src = expand_pixel_32_1x128 (src);
5101 
5102    while (height--)
5103    {
5104	dst = dst_line;
5105 
5106	dst_line += dst_stride;
5107	w = width;
5108 
5109	while (w && (uintptr_t)dst & 15)
5110	{
5111	    __m128i vd;
5112 
5113	    vd = unpack_32_1x128 (*dst);
5114 
5115	    *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
5116					      xmm_src));
5117	    w--;
5118	    dst++;
5119	}
5120 
5121	while (w >= 4)
5122	{
5123	    __m128i tmp_lo, tmp_hi;
5124 
5125	    xmm_dst = load_128_aligned ((__m128i*)dst);
5126 
5127	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5128	    expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi);
5129 
5130	    tmp_lo = xmm_src;
5131	    tmp_hi = xmm_src;
5132 
5133	    over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
5134			&xmm_dsta_lo, &xmm_dsta_hi,
5135			&tmp_lo, &tmp_hi);
5136 
5137	    save_128_aligned (
5138		(__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi));
5139 
5140	    w -= 4;
5141	    dst += 4;
5142	}
5143 
5144	while (w)
5145	{
5146	    __m128i vd;
5147 
5148	    vd = unpack_32_1x128 (*dst);
5149 
5150	    *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
5151					      xmm_src));
5152	    w--;
5153	    dst++;
5154	}
5155 
5156    }
5157 
5158}
5159 
5160static void
5161sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
5162				    pixman_composite_info_t *info)
5163{
5164    PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height;
5165    uint32_t    *src, *src_line, s;
5166    uint32_t    *dst, *dst_line, d;
5167    uint32_t    *mask, *mask_line;
5168    uint32_t    m;
5169    int src_stride, mask_stride, dst_stride;
5170    int32_t w;
5171 
5172    __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
5173    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
5174    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
5175 
5176    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0)
5177	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0);
5178    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
5179	mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);
5180    PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
5181	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);
5182 
5183    while (height--)
5184    {
5185        src = src_line;
5186        src_line += src_stride;
5187        dst = dst_line;
5188        dst_line += dst_stride;
5189        mask = mask_line;
5190        mask_line += mask_stride;
5191 
5192        w = width;
5193 
5194        while (w && (uintptr_t)dst & 15)
5195        {
5196	    uint32_t sa;
5197 
5198            s = *src++;
5199            m = (*mask++) >> 24;
5200            d = *dst;
5201 
5202	    sa = s >> 24;
5203 
5204	    if (m)
5205	    {
5206		if (sa == 0xff && m == 0xff)
5207		{
5208		    *dst = s;
5209		}
5210		else
5211		{
5212		    __m128i ms, md, ma, msa;
5213 
5214		    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
5215		    ms = unpack_32_1x128 (s);
5216		    md = unpack_32_1x128 (d);
5217 
5218		    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
5219 
5220		    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
5221		}
5222	    }
5223 
5224	    dst++;
5225            w--;
5226        }
5227 
5228        while (w >= 4)
5229        {
5230	    xmm_mask = load_128_unaligned ((__m128i*)mask);
5231 
5232	    if (!is_transparent (xmm_mask))
5233	    {
5234		xmm_src = load_128_unaligned ((__m128i*)src);
5235 
5236		if (is_opaque (xmm_mask) && is_opaque (xmm_src))
5237		{
5238		    save_128_aligned ((__m128i *)dst, xmm_src);
5239		}
5240		else
5241		{
5242		    xmm_dst = load_128_aligned ((__m128i *)dst);
5243 
5244		    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
5245		    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
5246		    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5247 
5248		    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
5249		    expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
5250 
5251		    in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
5252				   &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
5253 
5254		    save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5255		}
5256	    }
5257 
5258            src += 4;
5259            dst += 4;
5260            mask += 4;
5261            w -= 4;
5262        }
5263 
5264        while (w)
5265        {
5266	    uint32_t sa;
5267 
5268            s = *src++;
5269            m = (*mask++) >> 24;
5270            d = *dst;
5271 
5272	    sa = s >> 24;
5273 
5274	    if (m)
5275	    {
5276		if (sa == 0xff && m == 0xff)
5277		{
5278		    *dst = s;
5279		}
5280		else
5281		{
5282		    __m128i ms, md, ma, msa;
5283 
5284		    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
5285		    ms = unpack_32_1x128 (s);
5286		    md = unpack_32_1x128 (d);
5287 
5288		    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
5289 
5290		    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
5291		}
5292	    }
5293 
5294	    dst++;
5295            w--;
5296        }
5297    }
5298 
5299}
5300 
5301/* A variant of 'sse2_combine_over_u' with minor tweaks */
5302static force_inline__inline__ __attribute__ ((__always_inline__)) void
5303scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t*       pd,
5304                                             const uint32_t* ps,
5305                                             int32_t         w,
5306                                             pixman_fixed_t  vx,
5307                                             pixman_fixed_t  unit_x,
5308                                             pixman_fixed_t  src_width_fixed,
5309                                             pixman_bool_t   fully_transparent_src)
5310{
5311    uint32_t s, d;
5312    const uint32_t* pm = NULL((void*)0);
5313 
5314    __m128i xmm_dst_lo, xmm_dst_hi;
5315    __m128i xmm_src_lo, xmm_src_hi;
5316    __m128i xmm_alpha_lo, xmm_alpha_hi;
5317 
5318    if (fully_transparent_src)
5319	return;
5320 
5321    /* Align dst on a 16-byte boundary */
5322    while (w && ((uintptr_t)pd & 15))
5323    {
5324	d = *pd;
5325	s = combine1 (ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)), pm);
5326	vx += unit_x;
5327	while (vx >= 0)
5328	    vx -= src_width_fixed;
5329 
5330	*pd++ = core_combine_over_u_pixel_sse2 (s, d);
5331	if (pm)
5332	    pm++;
5333	w--;
5334    }
5335 
5336    while (w >= 4)
5337    {
5338	__m128i tmp;
5339	uint32_t tmp1, tmp2, tmp3, tmp4;
5340 
5341	tmp1 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5342	vx += unit_x;
5343	while (vx >= 0)
5344	    vx -= src_width_fixed;
5345	tmp2 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5346	vx += unit_x;
5347	while (vx >= 0)
5348	    vx -= src_width_fixed;
5349	tmp3 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5350	vx += unit_x;
5351	while (vx >= 0)
5352	    vx -= src_width_fixed;
5353	tmp4 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5354	vx += unit_x;
5355	while (vx >= 0)
5356	    vx -= src_width_fixed;
5357 
5358	tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
5359 
5360	xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm);
5361 
5362	if (is_opaque (xmm_src_hi))
5363	{
5364	    save_128_aligned ((__m128i*)pd, xmm_src_hi);
5365	}
5366	else if (!is_zero (xmm_src_hi))
5367	{
5368	    xmm_dst_hi = load_128_aligned ((__m128i*) pd);
5369 
5370	    unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
5371	    unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
5372 
5373	    expand_alpha_2x128 (
5374		xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
5375 
5376	    over_2x128 (&xmm_src_lo, &xmm_src_hi,
5377			&xmm_alpha_lo, &xmm_alpha_hi,
5378			&xmm_dst_lo, &xmm_dst_hi);
5379 
5380	    /* rebuid the 4 pixel data and save*/
5381	    save_128_aligned ((__m128i*)pd,
5382			      pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5383	}
5384 
5385	w -= 4;
5386	pd += 4;
5387	if (pm)
5388	    pm += 4;
5389    }
5390 
5391    while (w)
5392    {
5393	d = *pd;
5394	s = combine1 (ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)), pm);
5395	vx += unit_x;
5396	while (vx >= 0)
5397	    vx -= src_width_fixed;
5398 
5399	*pd++ = core_combine_over_u_pixel_sse2 (s, d);
5400	if (pm)
5401	    pm++;
5402 
5403	w--;
5404    }
5405}
5406 
5407FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
 w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
 (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
 void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.height) << 16)); repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
 * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5408		       scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
 w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
 (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
 void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.height) << 16)); repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
 * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5409		       uint32_t, uint32_t, COVER)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
 w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
 (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
 void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.height) << 16)); repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
 * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5410FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
 w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
 (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
 void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER (
pixman_implementation_t *imp, pixman_composite_info_t *info) {
 __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5411		       scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
 w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
 (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
 void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER (
pixman_implementation_t *imp, pixman_composite_info_t *info) {
 __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5412		       uint32_t, uint32_t, NONE)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
 w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
 (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
 void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER (
pixman_implementation_t *imp, pixman_composite_info_t *info) {
 __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5413FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
 w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
 (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
 void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER (
pixman_implementation_t *imp, pixman_composite_info_t *info) {
 __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5414		       scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
 w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
 (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
 void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER (
pixman_implementation_t *imp, pixman_composite_info_t *info) {
 __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5415		       uint32_t, uint32_t, PAD)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
 w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
 (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
 void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER (
pixman_implementation_t *imp, pixman_composite_info_t *info) {
 __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5416FAST_NEAREST_MAINLOOP (sse2_8888_8888_normal_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
 w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
 (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
 void fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy =
 ((pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL ==
 PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 };
 if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5417		       scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
 w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
 (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
 void fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy =
 ((pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL ==
 PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 };
 if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5418		       uint32_t, uint32_t, NORMAL)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t
 w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx
, pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER
 (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static
 void fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy =
 ((pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (0 && !0) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL ==
 PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 };
 if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5419 
5420static force_inline__inline__ __attribute__ ((__always_inline__)) void
5421scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
5422					       uint32_t *       dst,
5423					       const uint32_t * src,
5424					       int32_t          w,
5425					       pixman_fixed_t   vx,
5426					       pixman_fixed_t   unit_x,
5427					       pixman_fixed_t   src_width_fixed,
5428					       pixman_bool_t    zero_src)
5429{
5430    __m128i xmm_mask;
5431    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
5432    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
5433    __m128i xmm_alpha_lo, xmm_alpha_hi;
5434 
5435    if (zero_src || (*mask >> 24) == 0)
5436	return;
5437 
5438    xmm_mask = create_mask_16_128 (*mask >> 24);
5439 
5440    while (w && (uintptr_t)dst & 15)
5441    {
5442	uint32_t s = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5443	vx += unit_x;
5444	while (vx >= 0)
5445	    vx -= src_width_fixed;
5446 
5447	if (s)
5448	{
5449	    uint32_t d = *dst;
5450 
5451	    __m128i ms = unpack_32_1x128 (s);
5452	    __m128i alpha     = expand_alpha_1x128 (ms);
5453	    __m128i dest      = xmm_mask;
5454	    __m128i alpha_dst = unpack_32_1x128 (d);
5455 
5456	    *dst = pack_1x128_32 (
5457		in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
5458	}
5459	dst++;
5460	w--;
5461    }
5462 
5463    while (w >= 4)
5464    {
5465	uint32_t tmp1, tmp2, tmp3, tmp4;
5466 
5467	tmp1 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5468	vx += unit_x;
5469	while (vx >= 0)
5470	    vx -= src_width_fixed;
5471	tmp2 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5472	vx += unit_x;
5473	while (vx >= 0)
5474	    vx -= src_width_fixed;
5475	tmp3 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5476	vx += unit_x;
5477	while (vx >= 0)
5478	    vx -= src_width_fixed;
5479	tmp4 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5480	vx += unit_x;
5481	while (vx >= 0)
5482	    vx -= src_width_fixed;
5483 
5484	xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
5485 
5486	if (!is_zero (xmm_src))
5487	{
5488	    xmm_dst = load_128_aligned ((__m128i*)dst);
5489 
5490	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
5491	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5492	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
5493			        &xmm_alpha_lo, &xmm_alpha_hi);
5494 
5495	    in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
5496			   &xmm_alpha_lo, &xmm_alpha_hi,
5497			   &xmm_mask, &xmm_mask,
5498			   &xmm_dst_lo, &xmm_dst_hi);
5499 
5500	    save_128_aligned (
5501		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5502	}
5503 
5504	dst += 4;
5505	w -= 4;
5506    }
5507 
5508    while (w)
5509    {
5510	uint32_t s = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16)));
5511	vx += unit_x;
5512	while (vx >= 0)
5513	    vx -= src_width_fixed;
5514 
5515	if (s)
5516	{
5517	    uint32_t d = *dst;
5518 
5519	    __m128i ms = unpack_32_1x128 (s);
5520	    __m128i alpha = expand_alpha_1x128 (ms);
5521	    __m128i mask  = xmm_mask;
5522	    __m128i dest  = unpack_32_1x128 (d);
5523 
5524	    *dst = pack_1x128_32 (
5525		in_over_1x128 (&ms, &alpha, &mask, &dest));
5526	}
5527 
5528	dst++;
5529	w--;
5530    }
5531 
5532}
5533 
5534FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.height) << 16)); repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
 * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5535			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.height) << 16)); repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
 * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5536			      uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.height) << 16)); repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
 * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5537FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5538			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5539			      uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5540FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5541			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5542			      uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vy = (
(pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >=
 src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5543FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy =
 ((pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL ==
 PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 };
 if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5544			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy =
 ((pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL ==
 PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 };
 if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5545			      uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16)); pixman_fixed_t
 max_vy; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t
 unit_x, unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__
; int __stride__; __bits__ = dest_image->bits.bits; __stride__
 = dest_image->bits.rowstride; (dst_stride) = __stride__ *
 (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line
) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) *
 (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); else do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1
] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]
; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { max_vy =
 ((pixman_fixed_t) ((uint32_t) (src_image->bits.height) <<
 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed
); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
 vx, unit_x, &width, &left_pad, &right_pad); vx +=
 left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; if (1 && !1) { mask = mask_line
; mask_line += mask_stride; } y = ((int) ((vy) >> 16));
 vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image
->bits.height); src = src_first_line + src_stride * y; if (
left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width - src_image->bits
.width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src
 + src_image->bits.width, right_pad, -((pixman_fixed_t) 1)
, 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL ==
 PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 };
 if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t
) 1), 0, src_width_fixed, 1); continue; } src = src_first_line
 + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed
, 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image->
bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed
, 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero
 + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1
); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER
 (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed
, unit_x, src_width_fixed, 0); } } }
5546 
5547#if PSHUFD_IS_FAST0
5548 
5549/***********************************************************************************/
5550 
5551# define BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
 wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
 * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
 * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
 1), vx, -(vx + 1), vx, -(vx + 1))						\
5552    const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);	\
5553    const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);	\
5554    const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1);		\
5555    const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x,	\
5556					   unit_x, -unit_x, unit_x, -unit_x);	\
5557    const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4,		\
5558					   unit_x * 4, -unit_x * 4,		\
5559					   unit_x * 4, -unit_x * 4,		\
5560					   unit_x * 4, -unit_x * 4);		\
5561    const __m128i xmm_zero = _mm_setzero_si128 ();				\
5562    __m128i xmm_x = _mm_set_epi16 (vx + unit_x * 3, -(vx + 1) - unit_x * 3,	\
5563				   vx + unit_x * 2, -(vx + 1) - unit_x * 2,	\
5564				   vx + unit_x * 1, -(vx + 1) - unit_x * 1,	\
5565				   vx + unit_x * 0, -(vx + 1) - unit_x * 0);	\
5566    __m128i xmm_wh_state;
5567 
5568#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase_)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
 vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); pix = _mm_srli_epi32 (xmm_a, 7 * 2); }
 while (0)			\
5569do {										\
5570    int phase = phase_;								\
5571    __m128i xmm_wh, xmm_a, xmm_b;						\
5572    /* fetch 2x2 pixel block into sse2 registers */				\
5573    __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);		\
5574    __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]);		\
5575    vx += unit_x;								\
5576    /* vertical interpolation */						\
5577    xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt);	\
5578    xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb);	\
5579    xmm_a = _mm_add_epi16 (xmm_a, xmm_b);						\
5580    /* calculate horizontal weights */						\
5581    if (phase <= 0)								\
5582    {										\
5583	xmm_wh_state = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x,		\
5584					16 - BILINEAR_INTERPOLATION_BITS7));	\
5585	xmm_x = _mm_add_epi16 (xmm_x, (phase < 0) ? xmm_ux1 : xmm_ux4);		\
5586	phase = 0;								\
5587    }										\
5588    xmm_wh = _mm_shuffle_epi32 (xmm_wh_state, _MM_SHUFFLE (phase, phase,	\((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(xmm_wh_state
), (int)((((phase) << 6) | ((phase) << 4) | ((phase
) << 2) | (phase)))))
5589							   phase, phase))((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(xmm_wh_state
), (int)((((phase) << 6) | ((phase) << 4) | ((phase
) << 2) | (phase)))));	\
5590    /* horizontal interpolation */						\
5591    xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 (		\((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(xmm_a), (int
)((((1) << 6) | ((0) << 4) | ((3) << 2) | (
2)))))
5592		xmm_a, _MM_SHUFFLE (1, 0, 3, 2))((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(xmm_a), (int
)((((1) << 6) | ((0) << 4) | ((3) << 2) | (
2))))), xmm_a), xmm_wh);		\
5593    /* shift the result */							\
5594    pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS7 * 2);		\
5595} while (0)
5596 
5597#else /************************************************************************/
5598 
5599# define BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
 wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
 * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
 * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
 1), vx, -(vx + 1), vx, -(vx + 1))						\
5600    const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);	\
5601    const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);	\
5602    const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1);		\
5603    const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x,	\
5604					  unit_x, -unit_x, unit_x, -unit_x);	\
5605    const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4,		\
5606					   unit_x * 4, -unit_x * 4,		\
5607					   unit_x * 4, -unit_x * 4,		\
5608					   unit_x * 4, -unit_x * 4);		\
5609    const __m128i xmm_zero = _mm_setzero_si128 ();				\
5610    __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1),		\
5611				   vx, -(vx + 1), vx, -(vx + 1))
5612 
5613#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
 vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); pix = _mm_srli_epi32 (xmm_a, 7 * 2); }
 while (0)			\
5614do {										\
5615    __m128i xmm_wh, xmm_a, xmm_b;						\
5616    /* fetch 2x2 pixel block into sse2 registers */				\
5617    __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);		\
5618    __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]);		\
5619    (void)xmm_ux4; /* suppress warning: unused variable 'xmm_ux4' */		\
5620    vx += unit_x;								\
5621    /* vertical interpolation */						\
5622    xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt);	\
5623    xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb);	\
5624    xmm_a = _mm_add_epi16 (xmm_a, xmm_b);					\
5625    /* calculate horizontal weights */						\
5626    xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x,			\
5627					16 - BILINEAR_INTERPOLATION_BITS7));	\
5628    xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1);					\
5629    /* horizontal interpolation */						\
5630    xmm_b = _mm_unpacklo_epi64 (/* any value is fine here */ xmm_b, xmm_a);	\
5631    xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh);		\
5632    /* shift the result */							\
5633    pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS7 * 2);		\
5634} while (0)
5635 
5636/***********************************************************************************/
5637 
5638#endif
5639 
5640#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);					\
5641do {										\
5642	__m128i xmm_pix;							\
5643	BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix, -1)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
 vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2
); } while (0);			\
5644	xmm_pix = _mm_packs_epi32 (xmm_pix, xmm_pix);				\
5645	xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix);				\
5646	pix = _mm_cvtsi128_si32 (xmm_pix);					\
5647} while(0)
5648 
5649#define BILINEAR_INTERPOLATE_FOUR_PIXELS(pix); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i
 xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i
 *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
 vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr
 = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);
 __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx
 >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1
, xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);
 pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0);					\
5650do {										\
5651	__m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4;				\
5652	BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix1, 0)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
 vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
 2); } while (0);			\
5653	BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix2, 1)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
 vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 *
 2); } while (0);			\
5654	BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix3, 2)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
 vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 *
 2); } while (0);			\
5655	BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix4, 3)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64
 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
 vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 *
 2); } while (0);			\
5656	xmm_pix1 = _mm_packs_epi32 (xmm_pix1, xmm_pix2);			\
5657	xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);			\
5658	pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3);				\
5659} while(0)
5660 
5661#define BILINEAR_SKIP_ONE_PIXEL()do { vx += unit_x; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); } while
(0)						\
5662do {										\
5663    vx += unit_x;								\
5664    xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1);					\
5665} while(0)
5666 
5667#define BILINEAR_SKIP_FOUR_PIXELS()do { vx += unit_x * 4; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4)
; } while(0)						\
5668do {										\
5669    vx += unit_x * 4;								\
5670    xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4);					\
5671} while(0)
5672 
5673/***********************************************************************************/
5674 
5675static force_inline__inline__ __attribute__ ((__always_inline__)) void
5676scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t *       dst,
5677					     const uint32_t * mask,
5678					     const uint32_t * src_top,
5679					     const uint32_t * src_bottom,
5680					     int32_t          w,
5681					     int              wt,
5682					     int              wb,
5683					     pixman_fixed_t   vx_,
5684					     pixman_fixed_t   unit_x_,
5685					     pixman_fixed_t   max_vx,
5686					     pixman_bool_t    zero_src)
5687{
5688    intptr_t vx = vx_;
5689    intptr_t unit_x = unit_x_;
5690    BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
 wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
 * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
 * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
 1), vx, -(vx + 1), vx, -(vx + 1));
5691    uint32_t pix1, pix2;
5692 
5693    while (w && ((uintptr_t)dst & 15))
5694    {
5695	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
5696	*dst++ = pix1;
5697	w--;
5698    }
5699 
5700    while ((w -= 4) >= 0) {
5701	__m128i xmm_src;
5702	BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i
 xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i
 *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
 vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr
 = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);
 __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx
 >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1
, xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);
 xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0);
5703	_mm_store_si128 ((__m128i *)dst, xmm_src);
5704	dst += 4;
5705    }
5706 
5707    if (w & 2)
5708    {
5709	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
5710	BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix2
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
5711	*dst++ = pix1;
5712	*dst++ = pix2;
5713    }
5714 
5715    if (w & 1)
5716    {
5717	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
5718	*dst = pix1;
5719    }
5720 
5721}
5722 
5723FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
 src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
 += left_pad; if ((0) & (1 << 2)) mask += left_pad;
 } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
 + src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
 = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
 > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5724			       scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
 src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
 += left_pad; if ((0) & (1 << 2)) mask += left_pad;
 } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
 + src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
 = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
 > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5725			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
 src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
 += left_pad; if ((0) & (1 << 2)) mask += left_pad;
 } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
 + src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
 = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
 > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5726			       COVER, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
 src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
 += left_pad; if ((0) & (1 << 2)) mask += left_pad;
 } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
 + src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
 = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
 > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5727FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
 (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5728			       scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
 (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5729			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
 (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5730			       PAD, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
 (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5731FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
 (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5732			       scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
 (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5733			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
 (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5734			       NONE, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
 (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5735FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5736			       scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5737			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5738			       NORMAL, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5739 
5740static force_inline__inline__ __attribute__ ((__always_inline__)) void
5741scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t *       dst,
5742					     const uint32_t * mask,
5743					     const uint32_t * src_top,
5744					     const uint32_t * src_bottom,
5745					     int32_t          w,
5746					     int              wt,
5747					     int              wb,
5748					     pixman_fixed_t   vx_,
5749					     pixman_fixed_t   unit_x_,
5750					     pixman_fixed_t   max_vx,
5751					     pixman_bool_t    zero_src)
5752{
5753    intptr_t vx = vx_;
5754    intptr_t unit_x = unit_x_;
5755    BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
 wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
 * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
 * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
 1), vx, -(vx + 1), vx, -(vx + 1));
5756    uint32_t pix1, pix2;
5757 
5758    while (w && ((uintptr_t)dst & 15))
5759    {
5760	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
5761	*dst++ = pix1 | 0xFF000000;
5762	w--;
5763    }
5764 
5765    while ((w -= 4) >= 0) {
5766	__m128i xmm_src;
5767	BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i
 xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i
 *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
 vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr
 = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);
 __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx
 >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1
, xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);
 xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0);
5768	_mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000));
5769	dst += 4;
5770    }
5771 
5772    if (w & 2)
5773    {
5774	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
5775	BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix2
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
5776	*dst++ = pix1 | 0xFF000000;
5777	*dst++ = pix2 | 0xFF000000;
5778    }
5779 
5780    if (w & 1)
5781    {
5782	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
5783	*dst = pix1 | 0xFF000000;
5784    }
5785}
5786 
5787FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_cover_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
 src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
 += left_pad; if ((0) & (1 << 2)) mask += left_pad;
 } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
 + src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
 = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
 > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5788			       scaled_bilinear_scanline_sse2_x888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
 src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
 += left_pad; if ((0) & (1 << 2)) mask += left_pad;
 } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
 + src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
 = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
 > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5789			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
 src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
 += left_pad; if ((0) & (1 << 2)) mask += left_pad;
 } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
 + src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
 = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
 > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5790			       COVER, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
 src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst
 += left_pad; if ((0) & (1 << 2)) mask += left_pad;
 } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, *
src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1
 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1
 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2
 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2
 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line
 + src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1]
 = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad
; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz
 > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[
1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst
, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((
((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5791FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_pad_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
 (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5792			       scaled_bilinear_scanline_sse2_x888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
 (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5793			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
 (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5794			       PAD, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 <
 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.
height) { weight1 = 0; y1 = src_image->bits.height - 1; } if
 (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5795FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_normal_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5796			       scaled_bilinear_scanline_sse2_x888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5797			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5798			       NORMAL, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0
); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask,
 src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst +=
 width; if ((0) & (1 << 2)) mask += width; vx += width
 * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->
bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits
.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (
dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5799 
5800static force_inline__inline__ __attribute__ ((__always_inline__)) void
5801scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t *       dst,
5802					      const uint32_t * mask,
5803					      const uint32_t * src_top,
5804					      const uint32_t * src_bottom,
5805					      int32_t          w,
5806					      int              wt,
5807					      int              wb,
5808					      pixman_fixed_t   vx_,
5809					      pixman_fixed_t   unit_x_,
5810					      pixman_fixed_t   max_vx,
5811					      pixman_bool_t    zero_src)
5812{
5813    intptr_t vx = vx_;
5814    intptr_t unit_x = unit_x_;
5815    BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
 wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
 * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
 * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
 1), vx, -(vx + 1), vx, -(vx + 1));
5816    uint32_t pix1, pix2;
5817 
5818    while (w && ((uintptr_t)dst & 15))
5819    {
5820	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
5821 
5822	if (pix1)
5823	{
5824	    pix2 = *dst;
5825	    *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
5826	}
5827 
5828	w--;
5829	dst++;
5830    }
5831 
5832    while (w  >= 4)
5833    {
5834	__m128i xmm_src;
5835	__m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo;
5836	__m128i xmm_alpha_hi, xmm_alpha_lo;
5837 
5838	BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i
 xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i
 *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
 vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr
 = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);
 __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx
 >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1
, xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);
 xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0);
5839 
5840	if (!is_zero (xmm_src))
5841	{
5842	    if (is_opaque (xmm_src))
5843	    {
5844		save_128_aligned ((__m128i *)dst, xmm_src);
5845	    }
5846	    else
5847	    {
5848		__m128i xmm_dst = load_128_aligned ((__m128i *)dst);
5849 
5850		unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
5851		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5852 
5853		expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
5854		over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi,
5855			    &xmm_dst_lo, &xmm_dst_hi);
5856 
5857		save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5858	    }
5859	}
5860 
5861	w -= 4;
5862	dst += 4;
5863    }
5864 
5865    while (w)
5866    {
5867	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
5868 
5869	if (pix1)
5870	{
5871	    pix2 = *dst;
5872	    *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
5873	}
5874 
5875	w--;
5876	dst++;
5877    }
5878}
5879 
5880FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
 src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (
dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0
); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
 weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5881			       scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
 src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (
dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0
); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
 weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5882			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
 src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (
dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0
); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
 weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5883			       COVER, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t
 *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD
, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD
, &y2, src_image->bits.height); src1 = src_first_line +
 src_stride * y1; src2 = src_first_line + src_stride * y2; if
 (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2
[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (
dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0
); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
 weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5884FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5885			       scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5886			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5887			       PAD, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5888FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5889			       scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5890			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5891			       NONE, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5892FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
 - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5893			       scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
 - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5894			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
 - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5895			       NORMAL, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 <<
 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image,
 dest_image->bits.format); mask_stride = 0; } else if ((0)
 & (1 << 2)) { do { uint32_t *__bits__; int __stride__
; __bits__ = mask_image->bits.bits; __stride__ = mask_image
->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof
 (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t
 *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while
 (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) *
 (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t
) ((uint32_t) (src_x) << 16)) + (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t
) (src_y) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride
; } y1 = ((int) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight
 (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) -
 weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7
) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((0) & (1 << 2)) mask += width
; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image
->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image->
bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
 - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad
; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2
[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 <<
 2)) mask += left_tz; vx += left_tz * unit_x; } if (width >
 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask
, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst
 += width; if ((0) & (1 << 2)) mask += width; vx +=
 width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image
->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->
bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 <<
 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1
[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((0) &
 (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
5896 
5897static force_inline__inline__ __attribute__ ((__always_inline__)) void
5898scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t *       dst,
5899						const uint8_t  * mask,
5900						const uint32_t * src_top,
5901						const uint32_t * src_bottom,
5902						int32_t          w,
5903						int              wt,
5904						int              wb,
5905						pixman_fixed_t   vx_,
5906						pixman_fixed_t   unit_x_,
5907						pixman_fixed_t   max_vx,
5908						pixman_bool_t    zero_src)
5909{
5910    intptr_t vx = vx_;
5911    intptr_t unit_x = unit_x_;
5912    BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
 wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
 * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
 * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
 1), vx, -(vx + 1), vx, -(vx + 1));
5913    uint32_t pix1, pix2;
5914 
5915    while (w && ((uintptr_t)dst & 15))
5916    {
5917	uint32_t sa;
5918	uint8_t m = *mask++;
5919 
5920	if (m)
5921	{
5922	    BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
5923	    sa = pix1 >> 24;
5924 
5925	    if (sa == 0xff && m == 0xff)
5926	    {
5927		*dst = pix1;
5928	    }
5929	    else
5930	    {
5931		__m128i ms, md, ma, msa;
5932 
5933		pix2 = *dst;
5934		ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
5935		ms = unpack_32_1x128 (pix1);
5936		md = unpack_32_1x128 (pix2);
5937 
5938		msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
5939 
5940		*dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
5941	    }
5942	}
5943	else
5944	{
5945	    BILINEAR_SKIP_ONE_PIXEL ()do { vx += unit_x; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); } while
(0);
5946	}
5947 
5948	w--;
5949	dst++;
5950    }
5951 
5952    while (w >= 4)
5953    {
5954        uint32_t m;
5955 
5956	__m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
5957	__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
5958	__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
5959 
5960        memcpy(&m, mask, sizeof(uint32_t));
5961 
5962	if (m)
5963	{
5964	    BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i
 xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i
 *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
 vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr
 = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);
 __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx
 >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1
, xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);
 xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0);
5965 
5966	    if (m == 0xffffffff && is_opaque (xmm_src))
5967	    {
5968		save_128_aligned ((__m128i *)dst, xmm_src);
5969	    }
5970	    else
5971	    {
5972		xmm_dst = load_128_aligned ((__m128i *)dst);
5973 
5974		xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
5975 
5976		unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
5977		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
5978		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5979 
5980		expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
5981		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
5982 
5983		in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
5984			       &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
5985 
5986		save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5987	    }
5988	}
5989	else
5990	{
5991	    BILINEAR_SKIP_FOUR_PIXELS ()do { vx += unit_x * 4; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4)
; } while(0);
5992	}
5993 
5994	w -= 4;
5995	dst += 4;
5996	mask += 4;
5997    }
5998 
5999    while (w)
6000    {
6001	uint32_t sa;
6002	uint8_t m = *mask++;
6003 
6004	if (m)
6005	{
6006	    BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
6007	    sa = pix1 >> 24;
6008 
6009	    if (sa == 0xff && m == 0xff)
6010	    {
6011		*dst = pix1;
6012	    }
6013	    else
6014	    {
6015		__m128i ms, md, ma, msa;
6016 
6017		pix2 = *dst;
6018		ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
6019		ms = unpack_32_1x128 (pix1);
6020		md = unpack_32_1x128 (pix2);
6021 
6022		msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
6023 
6024		*dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
6025	    }
6026	}
6027	else
6028	{
6029	    BILINEAR_SKIP_ONE_PIXEL ()do { vx += unit_x; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); } while
(0);
6030	}
6031 
6032	w--;
6033	dst++;
6034    }
6035}
6036 
6037FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
 weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6038			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
 weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6039			       uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
 weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6040			       COVER, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
 weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6041FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6042			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6043			       uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6044			       PAD, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6045FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6046			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6047			       uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6048			       NONE, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6049FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
 - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6050			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
 - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6051			       uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
 - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6052			       NORMAL, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
 - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 2) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 2) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6053 
6054static force_inline__inline__ __attribute__ ((__always_inline__)) void
6055scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t *       dst,
6056						const uint32_t * mask,
6057						const uint32_t * src_top,
6058						const uint32_t * src_bottom,
6059						int32_t          w,
6060						int              wt,
6061						int              wb,
6062						pixman_fixed_t   vx_,
6063						pixman_fixed_t   unit_x_,
6064						pixman_fixed_t   max_vx,
6065						pixman_bool_t    zero_src)
6066{
6067    intptr_t vx = vx_;
6068    intptr_t unit_x = unit_x_;
6069    BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt,
 wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb
, wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0,
 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 (
unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x
); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x
 * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x
 * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128
 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx +
 1), vx, -(vx + 1), vx, -(vx + 1));
6070    uint32_t pix1;
6071    __m128i xmm_mask;
6072 
6073    if (zero_src || (*mask >> 24) == 0)
6074	return;
6075 
6076    xmm_mask = create_mask_16_128 (*mask >> 24);
6077 
6078    while (w && ((uintptr_t)dst & 15))
6079    {
6080	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
6081	if (pix1)
6082	{
6083		uint32_t d = *dst;
6084 
6085		__m128i ms = unpack_32_1x128 (pix1);
6086		__m128i alpha     = expand_alpha_1x128 (ms);
6087		__m128i dest      = xmm_mask;
6088		__m128i alpha_dst = unpack_32_1x128 (d);
6089 
6090		*dst = pack_1x128_32
6091			(in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
6092	}
6093 
6094	dst++;
6095	w--;
6096    }
6097 
6098    while (w >= 4)
6099    {
6100	__m128i xmm_src;
6101	BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i
 xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i
 *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64
 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4;
 vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr
, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8
 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b
); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16
 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64
 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (
xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 *
 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr
 = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);
 __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx
 >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b
; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >>
 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1
, xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);
 xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0);
6102 
6103	if (!is_zero (xmm_src))
6104	{
6105	    __m128i xmm_src_lo, xmm_src_hi;
6106	    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
6107	    __m128i xmm_alpha_lo, xmm_alpha_hi;
6108 
6109	    xmm_dst = load_128_aligned ((__m128i*)dst);
6110 
6111	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
6112	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
6113	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
6114				&xmm_alpha_lo, &xmm_alpha_hi);
6115 
6116	    in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
6117			   &xmm_alpha_lo, &xmm_alpha_hi,
6118			   &xmm_mask, &xmm_mask,
6119			   &xmm_dst_lo, &xmm_dst_hi);
6120 
6121	    save_128_aligned
6122		((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
6123	}
6124 
6125	dst += 4;
6126	w -= 4;
6127    }
6128 
6129    while (w)
6130    {
6131	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i
 tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16
]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom
[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16
 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16
 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16
 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b
 = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16
 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32
 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix
, xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1
 = _mm_cvtsi128_si32 (xmm_pix); } while(0);
6132	if (pix1)
6133	{
6134		uint32_t d = *dst;
6135 
6136		__m128i ms = unpack_32_1x128 (pix1);
6137		__m128i alpha     = expand_alpha_1x128 (ms);
6138		__m128i dest      = xmm_mask;
6139		__m128i alpha_dst = unpack_32_1x128 (d);
6140 
6141		*dst = pack_1x128_32
6142			(in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
6143	}
6144 
6145	dst++;
6146	w--;
6147    }
6148}
6149 
6150FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
 weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6151			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
 weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6152			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
 weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6153			       COVER, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD
 || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds
 (src_image->bits.width, v.vector[0], unit_x, &left_pad
, &left_tz, &width, &right_tz, &right_pad); if
 (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad +=
 right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad *
 unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0]
; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((
uint32_t) (src_image->bits.width) << 16))); max_x = (
(int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1
; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height
); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height
); src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1
, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) {
 weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height
) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2
 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->
bits.height) { weight2 = 0; y2 = src_image->bits.height - 1
; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels
; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom
; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0
[64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat (
PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat
 (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top
 = src_first_line + src_stride * y1; src_line_bottom = src_first_line
 + src_stride * y2; if (need_src_extension) { for (i=0; i<
src_width;) { for (j=0; j<src_image->bits.width; j++, i
++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1
[i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0
[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] =
 src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2
[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom
[0]; width_remain = width; while (width_remain > 0) { repeat
 (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int)
 ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed
 - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels >
 width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6154FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6155			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6156			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6157			       PAD, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD ==
 PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL)
 { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6158FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6159			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6160			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6161			       NONE, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector
[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t)
 ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) {
 uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if
 (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->
bits.height) { weight1 = 0; y1 = src_image->bits.height - 1
; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6162FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
 - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6163			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
 - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6164			       uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
 - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6165			       NORMAL, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
 (pixman_implementation_t *imp, pixman_composite_info_t *info
) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__
((unused)) pixman_image_t * src_image = info->src_image; __attribute__
((unused)) pixman_image_t * mask_image = info->mask_image;
 __attribute__((unused)) pixman_image_t * dest_image = info->
dest_image; __attribute__((unused)) int32_t src_x = info->
src_x; __attribute__((unused)) int32_t src_y = info->src_y
; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__
((unused)) int32_t mask_y = info->mask_y; __attribute__((unused
)) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t
 dest_y = info->dest_y; __attribute__((unused)) int32_t width
 = info->width; __attribute__((unused)) int32_t height = info
->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t
 *src_first_line; int y1, y2; pixman_fixed_t max_vx = (2147483647
); pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x
, unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t
 *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask
; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t
 src_width_fixed; int max_x; pixman_bool_t need_src_extension
; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image
->bits.bits; __stride__ = dest_image->bits.rowstride; (
dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
 (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride
) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1
) & (1 << 1)) { solid_mask = _pixman_image_get_solid
 (imp, mask_image, dest_image->bits.format); mask_stride =
 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t
 *__bits__; int __stride__; __bits__ = mask_image->bits.bits
; __stride__ = mask_image->bits.rowstride; (mask_stride) =
 __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y
) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int
 __stride__; __bits__ = src_image->bits.bits; __stride__ =
 src_image->bits.rowstride; (src_stride) = __stride__ * (int
) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line
) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0);
 } while (0); v.vector[0] = ((pixman_fixed_t) ((uint32_t) (src_x
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((uint32_t) (src_y
) << 16)) + (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((uint32_t) (1) <<
 16))); if (!_moz_pixman_transform_point_3d (src_image->common
.transform, &v)) return; unit_x = src_image->common.transform
->matrix[0][0]; unit_y = src_image->common.transform->
matrix[1][1]; v.vector[0] -= (((pixman_fixed_t) ((uint32_t) (
1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ((uint32_t
) (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits
.width, v.vector[0], unit_x, &left_pad, &left_tz, &
width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz
; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x;
 } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v
.vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t
) ((uint32_t) (src_image->bits.width) << 16))); max_x
 = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16))
 + 1; if (src_image->bits.width < 64) { src_width = 0; while
 (src_width < 64 && src_width <= max_x) src_width
 += src_image->bits.width; need_src_extension = 1; } else {
 src_width = src_image->bits.width; need_src_extension = 0
; } src_width_fixed = ((pixman_fixed_t) ((uint32_t) (src_width
) << 16)); } while (--height >= 0) { int weight1, weight2
; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if
 ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line
 += mask_stride; } y1 = ((int) ((vy) >> 16)); weight2 =
 pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1
 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1
 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL
 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1
[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image
->bits.height); src1 = src_first_line + src_stride * y1; src2
 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1
[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 0); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] =
 src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2
[src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE
) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]
; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image
->bits.height) { weight1 = 0; y1 = src_image->bits.height
 - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image
->bits.height) { weight2 = 0; y2 = src_image->bits.height
 - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line
 + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] =
 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0,
 1); dst += left_pad; if ((1 << 1) & (1 << 2)
) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1
[1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) &
 (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if
 (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x,
 0, 0); dst += width; if ((1 << 1) & (1 << 2)
) mask += width; vx += width * unit_x; } if (right_tz > 0)
 { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0;
 buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) &
 (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1
[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0
, 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL
) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top
; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2
[2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1
[64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image
->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image
->bits.height); src_line_top = src_first_line + src_stride
 * y1; src_line_bottom = src_first_line + src_stride * y2; if
 (need_src_extension) { for (i=0; i<src_width;) { for (j=0
; j<src_image->bits.width; j++, i++) { extended_src_line0
[i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom
[j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom
 = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width
 - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width
 - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while
 (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx
, src_width_fixed); if (((int) ((vx) >> 16)) == src_width
 - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t
) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels
 = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) &
 ((((pixman_fixed_t) ((uint32_t) (1) << 16))) - ((pixman_fixed_t
) 1))), unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL
, &vx, src_width_fixed); } if (((int) ((vx) >> 16))
 != src_width - 1 && width_remain > 0) { num_pixels
 = ((src_width_fixed - (((pixman_fixed_t) ((uint32_t) (1) <<
 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels
 > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1
, weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels
; vx += num_pixels * unit_x; dst += num_pixels; if ((1 <<
 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER
 (dst, mask, src_first_line + src_stride * y1, src_first_line
 + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx
, 0); } } }
6166 
6167static const pixman_fast_path_t sse2_fast_paths[] =
6168{
6169    /* PIXMAN_OP_OVER */
6170    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_0565
 },
6171    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_0565
 },
6172    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), sse2_composite_over_n_8888
 },
6173    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), sse2_composite_over_n_8888
 },
6174    PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_r5g6b5, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_n_0565 },
6175    PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, sse2_composite_over_n_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_b5g6r5, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_n_0565 },
6176    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888
 },
6177    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888
 },
6178    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888
 },
6179    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888
 },
6180    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_0565
 },
6181    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_0565
 },
6182    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
 },
6183    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
 },
6184    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
 },
6185    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
 },
6186    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8r8g8b8, (
(PIXMAN_a8r8g8b8 == (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1)
 | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) |
 ((1) << 16) | ((0) << 12) | ((0) << 8) | (
(0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888_8888
 },
6187    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
 == (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
 << 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1
 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888
 },
6188    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
 == (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
 << 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1
 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888
 },
6189    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
 == (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
 << 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1
 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888
 },
6190    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
 == (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
 << 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1
 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888
 },
6191    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
 == (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
 << 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1
 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888
 },
6192    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
 == (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
 << 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1
 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888
 },
6193    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
 == (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
 << 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1
 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888
 },
6194    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8
 == (((0) << 24) | ((0) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1
 << 2) | (1 << 5) | (1 << 1) | (1 << 6
)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)
))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1
 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888
 },
6195    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888
 },
6196    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888
 },
6197    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888
 },
6198    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888
 },
6199    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888
 },
6200    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888
 },
6201    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888
 },
6202    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888
 },
6203    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((0) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8
 == (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), sse2_composite_over_n_8888_8888_ca },
6204    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((0) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8
 == (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), sse2_composite_over_n_8888_8888_ca },
6205    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((0) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8
 == (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), sse2_composite_over_n_8888_8888_ca },
6206    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((0) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8
 == (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), sse2_composite_over_n_8888_8888_ca },
6207    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((0) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8
 == (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_r5g6b5, ((1 << 5) | (1 << 1) | (1 <<
 6)), sse2_composite_over_n_8888_0565_ca },
6208    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((0) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8
 == (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_b5g6r5, ((1 << 5) | (1 << 1) | (1 <<
 6)), sse2_composite_over_n_8888_0565_ca },
6209    PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((2) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((2) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((2) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((2) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
 },
6210    PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((2) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((2) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((2) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((2) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
 },
6211    PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((3) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((3) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((3) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((3) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
 },
6212    PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((3) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((3) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((3) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((3) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
 },
6213    PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((2) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((2) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((2) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((2) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_r5g6b5, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_0565
 },
6214    PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((3) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((3) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((3) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((3) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_b5g6r5, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_0565
 },
6215    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
 },
6216    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
 },
6217    
6218    /* PIXMAN_OP_OVER_REVERSE */
6219    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888){ PIXMAN_OP_OVER_REVERSE, (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((1 << 2) | (1 << 5) | (1 << 1) |
 (1 << 6)) | (((((0) << 24) | ((1) << 16) |
 ((0) << 12) | ((0) << 8) | ((0) << 4) | ((
0))) == (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))), (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_reverse_n_8888
 },
6220    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888){ PIXMAN_OP_OVER_REVERSE, (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((1 << 2) | (1 << 5) | (1 << 1) |
 (1 << 6)) | (((((0) << 24) | ((1) << 16) |
 ((0) << 12) | ((0) << 8) | ((0) << 4) | ((
0))) == (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))), (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_reverse_n_8888
 },
6221 
6222    /* PIXMAN_OP_ADD */
6223    PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((0) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8
 == (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))) | (1 << 8)))
, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), sse2_composite_add_n_8888_8888_ca },
6224    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8){ PIXMAN_OP_ADD, PIXMAN_a8, (((1 << 2) | (1 << 5)
 | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 <<
 1) | (1 << 6)), sse2_composite_add_8_8 },
6225    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888){ PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_8888_8888
 },
6226    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888){ PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_8888_8888
 },
6227    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 <<
 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8
 },
6228    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 <<
 1) | (1 << 6)), sse2_composite_add_n_8 },
6229    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), sse2_composite_add_n_8888 },
6230    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), sse2_composite_add_n_8888 },
6231    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), sse2_composite_add_n_8888 },
6232    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), sse2_composite_add_n_8888 },
6233    PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888
 },
6234    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888
 },
6235    PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888
 },
6236    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888
 },
6237 
6238    /* PIXMAN_OP_SRC */
6239    PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888
 },
6240    PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888
 },
6241    PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888
 },
6242    PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888
 },
6243    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565
 },
6244    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565
 },
6245    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565
 },
6246    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565
 },
6247    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_8888
 },
6248    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_8888
 },
6249    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
 },
6250    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
 },
6251    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
 },
6252    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
 },
6253    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
 },
6254    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
 (((0) << 24) | ((1) << 16) | ((0) << 12) |
 ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
 },
6255    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_r5g6b5, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_r5g6b5 == (
((0) << 24) | ((1) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
 },
6256    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_b5g6r5, (((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_b5g6r5 == (
((0) << 24) | ((1) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
 23) | (1 << 11) | (1 << 0)))), (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))), (((((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area
 },
6257 
6258    /* PIXMAN_OP_IN */
6259    PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8){ PIXMAN_OP_IN, PIXMAN_a8, (((1 << 2) | (1 << 5) |
 (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 <<
 1) | (1 << 6)), sse2_composite_in_8_8 },
6260    PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8){ PIXMAN_OP_IN, (((0) << 24) | ((1) << 16) | ((0)
 << 12) | ((0) << 8) | ((0) << 4) | ((0))),
 (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 <<
 5) | (1 << 1) | (1 << 6)), sse2_composite_in_n_8_8
 },
6261    PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8){ PIXMAN_OP_IN, (((0) << 24) | ((1) << 16) | ((0)
 << 12) | ((0) << 8) | ((0) << 4) | ((0))),
 (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 <<
 1) | (1 << 6)), sse2_composite_in_n_8 },
6262 
6263    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
 << 23), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER,
 }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1
 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
, },
6264    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
 << 23), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER,
 }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1
 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
, },
6265    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
 << 23), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER,
 }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1
 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
, },
6266    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
 << 23), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER,
 }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1
 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER
, },
6267 
6268    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
 << 23), (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
, },
6269    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
 << 23), (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
, },
6270    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
 << 23), (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
, },
6271    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
 << 23), (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 11) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER
, },
6272 
6273    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1
 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC,
 }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
 16)), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1
 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
, },
6274    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1
 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC,
 }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
 16)), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1
 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
, },
6275    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1
 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC,
 }, { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
 16)), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1
 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
, },
6276    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1
 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC,
 }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
 16)), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1
 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
, },
6277    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1
 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC,
 }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
 16)), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1
 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
, },
6278    SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1
 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC,
 }, { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
 16)), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC
, }, { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1
 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC
, },
6279 
6280    SIMPLE_BILINEAR_FAST_PATH_COVER  (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC
, },
6281    SIMPLE_BILINEAR_FAST_PATH_COVER  (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC
, },
6282    SIMPLE_BILINEAR_FAST_PATH_PAD    (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
 16)), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC
, },
6283    SIMPLE_BILINEAR_FAST_PATH_PAD    (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
 16)), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC
, },
6284    SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 3) | (1 << 4)) | (1 <<
 16)), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC
, },
6285    SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 3) | (1 << 4)) | (1 <<
 16)), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC
, },
6286 
6287    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
, },
6288    SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
, },
6289    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
, },
6290    SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((0) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((0) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((0) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 <<
 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER
, },
6291 
6292    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
, },
6293    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
, },
6294    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
, },
6295    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
 24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
 ((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 23) | (1 << 11) | (1 << 0)))) | (1 <<
 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), (((0) << 24) | ((1) << 16) | (
(0) << 12) | ((0) << 8) | ((0) << 4) | ((0)
)), (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
 24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
 | ((1) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
 16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 <<
 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
 (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER
, },
6296 
6297    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) |
 ((0) << 16) | ((0) << 12) | ((0) << 8) | (
(0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
, },
6298    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) |
 ((0) << 16) | ((0) << 12) | ((0) << 8) | (
(0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
, },
6299    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) |
 ((0) << 16) | ((0) << 12) | ((0) << 8) | (
(0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
, },
6300    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1
 << 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) |
 ((0) << 16) | ((0) << 12) | ((0) << 8) | (
(0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) |
 (1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER
, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (
1 << 1) | (1 << 19) | (1 << 5) | (1 <<
 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | (
1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24)
 | ((0) << 16) | ((0) << 12) | ((0) << 8) |
 ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
 << 24) | ((1) << 16) | ((0) << 12) | ((0) <<
 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 <<
 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8,
 ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER
, },
6301 
6302    { PIXMAN_OP_NONE },
6303};
6304 
6305static uint32_t *
6306sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
6307{
6308    int w = iter->width;
6309    __m128i ff000000 = mask_ff000000;
6310    uint32_t *dst = iter->buffer;
6311    uint32_t *src = (uint32_t *)iter->bits;
6312 
6313    iter->bits += iter->stride;
6314 
6315    while (w && ((uintptr_t)dst) & 0x0f)
6316    {
6317	*dst++ = (*src++) | 0xff000000;
6318	w--;
6319    }
6320 
6321    while (w >= 4)
6322    {
6323	save_128_aligned (
6324	    (__m128i *)dst, _mm_or_si128 (
6325		load_128_unaligned ((__m128i *)src), ff000000));
6326 
6327	dst += 4;
6328	src += 4;
6329	w -= 4;
6330    }
6331 
6332    while (w)
6333    {
6334	*dst++ = (*src++) | 0xff000000;
6335	w--;
6336    }
6337 
6338    return iter->buffer;
6339}
6340 
6341static uint32_t *
6342sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
6343{
6344    int w = iter->width;
6345    uint32_t *dst = iter->buffer;
6346    uint16_t *src = (uint16_t *)iter->bits;
6347    __m128i ff000000 = mask_ff000000;
6348 
6349    iter->bits += iter->stride;
6350 
6351    while (w && ((uintptr_t)dst) & 0x0f)
6352    {
6353	uint16_t s = *src++;
6354 
6355	*dst++ = convert_0565_to_8888 (s);
6356	w--;
6357    }
6358 
6359    while (w >= 8)
6360    {
6361	__m128i lo, hi, s;
6362 
6363	s = _mm_loadu_si128 ((__m128i *)src);
6364 
6365	lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ()));
6366	hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ()));
6367 
6368	save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000));
6369	save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000));
6370 
6371	dst += 8;
6372	src += 8;
6373	w -= 8;
6374    }
6375 
6376    while (w)
6377    {
6378	uint16_t s = *src++;
6379 
6380	*dst++ = convert_0565_to_8888 (s);
6381	w--;
6382    }
6383 
6384    return iter->buffer;
6385}
6386 
6387static uint32_t *
6388sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
6389{
6390    int w = iter->width;
6391    uint32_t *dst = iter->buffer;
6392    uint8_t *src = iter->bits;
6393    __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
6394 
6395    iter->bits += iter->stride;
6396 
6397    while (w && (((uintptr_t)dst) & 15))
6398    {
6399        *dst++ = (uint32_t)(*(src++)) << 24;
6400        w--;
6401    }
6402 
6403    while (w >= 16)
6404    {
6405	xmm0 = _mm_loadu_si128((__m128i *)src);
6406 
6407	xmm1 = _mm_unpacklo_epi8  (_mm_setzero_si128(), xmm0);
6408	xmm2 = _mm_unpackhi_epi8  (_mm_setzero_si128(), xmm0);
6409	xmm3 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm1);
6410	xmm4 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm1);
6411	xmm5 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm2);
6412	xmm6 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm2);
6413 
6414	_mm_store_si128(((__m128i *)(dst +  0)), xmm3);
6415	_mm_store_si128(((__m128i *)(dst +  4)), xmm4);
6416	_mm_store_si128(((__m128i *)(dst +  8)), xmm5);
6417	_mm_store_si128(((__m128i *)(dst + 12)), xmm6);
6418 
6419	dst += 16;
6420	src += 16;
6421	w -= 16;
6422    }
6423 
6424    while (w)
6425    {
6426	*dst++ = (uint32_t)(*(src++)) << 24;
6427	w--;
6428    }
6429 
6430    return iter->buffer;
6431}
6432 
6433#define IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (1 << 0) | (1 << 25) | (1 << 23))							\
6434    (FAST_PATH_STANDARD_FLAGS((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | FAST_PATH_ID_TRANSFORM(1 << 0) |		\
6435     FAST_PATH_BITS_IMAGE(1 << 25) | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST(1 << 23))
6436 
6437static const pixman_iter_info_t sse2_iters[] = 
6438{
6439    { PIXMAN_x8r8g8b8, IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (1 << 0) | (1 << 25) | (1 << 23)), ITER_NARROW,
6440      _pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL((void*)0)
6441    },
6442    { PIXMAN_r5g6b5, IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (1 << 0) | (1 << 25) | (1 << 23)), ITER_NARROW,
6443      _pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL((void*)0)
6444    },
6445    { PIXMAN_a8, IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
 6)) | (1 << 0) | (1 << 25) | (1 << 23)), ITER_NARROW,
6446      _pixman_iter_init_bits_stride, sse2_fetch_a8, NULL((void*)0)
6447    },
6448    { PIXMAN_null(((0) << 24) | ((0) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0))) },
6449};
6450 
6451#if defined(__GNUC__4) && !defined(__x86_64__1) && !defined(__amd64__1)
6452__attribute__((__force_align_arg_pointer__))
6453#endif
6454pixman_implementation_t *
6455_pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
6456{
6457    pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths);
6458 
6459    /* SSE2 constants */
6460    mask_565_r  = create_mask_2x32_128 (0x00f80000, 0x00f80000);
6461    mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000);
6462    mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0);
6463    mask_565_b  = create_mask_2x32_128 (0x0000001f, 0x0000001f);
6464    mask_red   = create_mask_2x32_128 (0x00f80000, 0x00f80000);
6465    mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00);
6466    mask_blue  = create_mask_2x32_128 (0x000000f8, 0x000000f8);
6467    mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0);
6468    mask_565_fix_g = create_mask_2x32_128  (0x0000c000, 0x0000c000);
6469    mask_0080 = create_mask_16_128 (0x0080);
6470    mask_00ff = create_mask_16_128 (0x00ff);
6471    mask_0101 = create_mask_16_128 (0x0101);
6472    mask_ffff = create_mask_16_128 (0xffff);
6473    mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);
6474    mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);
6475    mask_565_rb = create_mask_2x32_128 (0x00f800f8, 0x00f800f8);
6476    mask_565_pack_multiplier = create_mask_2x32_128 (0x20000004, 0x20000004);
6477 
6478    /* Set up function pointers */
6479    imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u;
6480    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u;
6481    imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u;
6482    imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u;
6483    imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u;
6484    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u;
6485    imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u;
6486    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u;
6487    imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u;
6488    imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u;
6489 
6490    imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u;
6491 
6492    imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca;
6493    imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca;
6494    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca;
6495    imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca;
6496    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca;
6497    imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca;
6498    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca;
6499    imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca;
6500    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca;
6501    imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca;
6502    imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca;
6503 
6504    imp->blt = sse2_blt;
6505    imp->fill = sse2_fill;
6506 
6507    imp->iter_info = sse2_iters;
6508 
6509    return imp;
6510}

←

/usr/lib/llvm-19/lib/clang/19/include/emmintrin.h

1/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9 
10#ifndef __EMMINTRIN_H
11#define __EMMINTRIN_H
12 
13#if !defined(__i386__) && !defined(__x86_64__1)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16 
17#include <xmmintrin.h>
18 
19typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
20typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
21 
22typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1)));
23typedef long long __m128i_u
24    __attribute__((__vector_size__(16), __aligned__(1)));
25 
26/* Type defines.  */
27typedef double __v2df __attribute__((__vector_size__(16)));
28typedef long long __v2di __attribute__((__vector_size__(16)));
29typedef short __v8hi __attribute__((__vector_size__(16)));
30typedef char __v16qi __attribute__((__vector_size__(16)));
31 
32/* Unsigned types */
33typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
34typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
35typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
36 
37/* We need an explicitly signed variant for char. Note that this shouldn't
38 * appear in the interface though. */
39typedef signed char __v16qs __attribute__((__vector_size__(16)));
40 
41#ifdef __SSE2__1
42/* Both _Float16 and __bf16 require SSE2 being enabled. */
43typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
44typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16)));
45typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1)));
46 
47typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16)));
48typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
49#endif
50 
51/* Define the default attributes for the functions in this file. */
52#define __DEFAULT_FN_ATTRS                                                     \
53  __attribute__((__always_inline__, __nodebug__,                               \
54                 __target__("sse2,no-evex512"), __min_vector_width__(128)))
55#define __DEFAULT_FN_ATTRS_MMX                                                 \
56  __attribute__((__always_inline__, __nodebug__,                               \
57                 __target__("mmx,sse2,no-evex512"), __min_vector_width__(64)))
58 
59/// Adds lower double-precision values in both operands and returns the
60///    sum in the lower 64 bits of the result. The upper 64 bits of the result
61///    are copied from the upper double-precision value of the first operand.
62///
63/// \headerfile <x86intrin.h>
64///
65/// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction.
66///
67/// \param __a
68///    A 128-bit vector of [2 x double] containing one of the source operands.
69/// \param __b
70///    A 128-bit vector of [2 x double] containing one of the source operands.
71/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
72///    sum of the lower 64 bits of both operands. The upper 64 bits are copied
73///    from the upper 64 bits of the first source operand.
74static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a,
75                                                        __m128d __b) {
76  __a[0] += __b[0];
77  return __a;
78}
79 
80/// Adds two 128-bit vectors of [2 x double].
81///
82/// \headerfile <x86intrin.h>
83///
84/// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction.
85///
86/// \param __a
87///    A 128-bit vector of [2 x double] containing one of the source operands.
88/// \param __b
89///    A 128-bit vector of [2 x double] containing one of the source operands.
90/// \returns A 128-bit vector of [2 x double] containing the sums of both
91///    operands.
92static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a,
93                                                        __m128d __b) {
94  return (__m128d)((__v2df)__a + (__v2df)__b);
95}
96 
97/// Subtracts the lower double-precision value of the second operand
98///    from the lower double-precision value of the first operand and returns
99///    the difference in the lower 64 bits of the result. The upper 64 bits of
100///    the result are copied from the upper double-precision value of the first
101///    operand.
102///
103/// \headerfile <x86intrin.h>
104///
105/// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction.
106///
107/// \param __a
108///    A 128-bit vector of [2 x double] containing the minuend.
109/// \param __b
110///    A 128-bit vector of [2 x double] containing the subtrahend.
111/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
112///    difference of the lower 64 bits of both operands. The upper 64 bits are
113///    copied from the upper 64 bits of the first source operand.
114static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a,
115                                                        __m128d __b) {
116  __a[0] -= __b[0];
117  return __a;
118}
119 
120/// Subtracts two 128-bit vectors of [2 x double].
121///
122/// \headerfile <x86intrin.h>
123///
124/// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction.
125///
126/// \param __a
127///    A 128-bit vector of [2 x double] containing the minuend.
128/// \param __b
129///    A 128-bit vector of [2 x double] containing the subtrahend.
130/// \returns A 128-bit vector of [2 x double] containing the differences between
131///    both operands.
132static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a,
133                                                        __m128d __b) {
134  return (__m128d)((__v2df)__a - (__v2df)__b);
135}
136 
137/// Multiplies lower double-precision values in both operands and returns
138///    the product in the lower 64 bits of the result. The upper 64 bits of the
139///    result are copied from the upper double-precision value of the first
140///    operand.
141///
142/// \headerfile <x86intrin.h>
143///
144/// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction.
145///
146/// \param __a
147///    A 128-bit vector of [2 x double] containing one of the source operands.
148/// \param __b
149///    A 128-bit vector of [2 x double] containing one of the source operands.
150/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
151///    product of the lower 64 bits of both operands. The upper 64 bits are
152///    copied from the upper 64 bits of the first source operand.
153static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a,
154                                                        __m128d __b) {
155  __a[0] *= __b[0];
156  return __a;
157}
158 
159/// Multiplies two 128-bit vectors of [2 x double].
160///
161/// \headerfile <x86intrin.h>
162///
163/// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction.
164///
165/// \param __a
166///    A 128-bit vector of [2 x double] containing one of the operands.
167/// \param __b
168///    A 128-bit vector of [2 x double] containing one of the operands.
169/// \returns A 128-bit vector of [2 x double] containing the products of both
170///    operands.
171static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a,
172                                                        __m128d __b) {
173  return (__m128d)((__v2df)__a * (__v2df)__b);
174}
175 
176/// Divides the lower double-precision value of the first operand by the
177///    lower double-precision value of the second operand and returns the
178///    quotient in the lower 64 bits of the result. The upper 64 bits of the
179///    result are copied from the upper double-precision value of the first
180///    operand.
181///
182/// \headerfile <x86intrin.h>
183///
184/// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction.
185///
186/// \param __a
187///    A 128-bit vector of [2 x double] containing the dividend.
188/// \param __b
189///    A 128-bit vector of [2 x double] containing divisor.
190/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
191///    quotient of the lower 64 bits of both operands. The upper 64 bits are
192///    copied from the upper 64 bits of the first source operand.
193static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a,
194                                                        __m128d __b) {
195  __a[0] /= __b[0];
196  return __a;
197}
198 
199/// Performs an element-by-element division of two 128-bit vectors of
200///    [2 x double].
201///
202/// \headerfile <x86intrin.h>
203///
204/// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction.
205///
206/// \param __a
207///    A 128-bit vector of [2 x double] containing the dividend.
208/// \param __b
209///    A 128-bit vector of [2 x double] containing the divisor.
210/// \returns A 128-bit vector of [2 x double] containing the quotients of both
211///    operands.
212static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a,
213                                                        __m128d __b) {
214  return (__m128d)((__v2df)__a / (__v2df)__b);
215}
216 
217/// Calculates the square root of the lower double-precision value of
218///    the second operand and returns it in the lower 64 bits of the result.
219///    The upper 64 bits of the result are copied from the upper
220///    double-precision value of the first operand.
221///
222/// \headerfile <x86intrin.h>
223///
224/// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction.
225///
226/// \param __a
227///    A 128-bit vector of [2 x double] containing one of the operands. The
228///    upper 64 bits of this operand are copied to the upper 64 bits of the
229///    result.
230/// \param __b
231///    A 128-bit vector of [2 x double] containing one of the operands. The
232///    square root is calculated using the lower 64 bits of this operand.
233/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
234///    square root of the lower 64 bits of operand \a __b, and whose upper 64
235///    bits are copied from the upper 64 bits of operand \a __a.
236static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a,
237                                                         __m128d __b) {
238  __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);
239  return __extension__(__m128d){__c[0], __a[1]};
240}
241 
242/// Calculates the square root of the each of two values stored in a
243///    128-bit vector of [2 x double].
244///
245/// \headerfile <x86intrin.h>
246///
247/// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction.
248///
249/// \param __a
250///    A 128-bit vector of [2 x double].
251/// \returns A 128-bit vector of [2 x double] containing the square roots of the
252///    values in the operand.
253static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) {
254  return __builtin_ia32_sqrtpd((__v2df)__a);
255}
256 
257/// Compares lower 64-bit double-precision values of both operands, and
258///    returns the lesser of the pair of values in the lower 64-bits of the
259///    result. The upper 64 bits of the result are copied from the upper
260///    double-precision value of the first operand.
261///
262///    If either value in a comparison is NaN, returns the value from \a __b.
263///
264/// \headerfile <x86intrin.h>
265///
266/// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction.
267///
268/// \param __a
269///    A 128-bit vector of [2 x double] containing one of the operands. The
270///    lower 64 bits of this operand are used in the comparison.
271/// \param __b
272///    A 128-bit vector of [2 x double] containing one of the operands. The
273///    lower 64 bits of this operand are used in the comparison.
274/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
275///    minimum value between both operands. The upper 64 bits are copied from
276///    the upper 64 bits of the first source operand.
277static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a,
278                                                        __m128d __b) {
279  return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);
280}
281 
282/// Performs element-by-element comparison of the two 128-bit vectors of
283///    [2 x double] and returns a vector containing the lesser of each pair of
284///    values.
285///
286///    If either value in a comparison is NaN, returns the value from \a __b.
287///
288/// \headerfile <x86intrin.h>
289///
290/// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction.
291///
292/// \param __a
293///    A 128-bit vector of [2 x double] containing one of the operands.
294/// \param __b
295///    A 128-bit vector of [2 x double] containing one of the operands.
296/// \returns A 128-bit vector of [2 x double] containing the minimum values
297///    between both operands.
298static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a,
299                                                        __m128d __b) {
300  return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);
301}
302 
303/// Compares lower 64-bit double-precision values of both operands, and
304///    returns the greater of the pair of values in the lower 64-bits of the
305///    result. The upper 64 bits of the result are copied from the upper
306///    double-precision value of the first operand.
307///
308///    If either value in a comparison is NaN, returns the value from \a __b.
309///
310/// \headerfile <x86intrin.h>
311///
312/// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction.
313///
314/// \param __a
315///    A 128-bit vector of [2 x double] containing one of the operands. The
316///    lower 64 bits of this operand are used in the comparison.
317/// \param __b
318///    A 128-bit vector of [2 x double] containing one of the operands. The
319///    lower 64 bits of this operand are used in the comparison.
320/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
321///    maximum value between both operands. The upper 64 bits are copied from
322///    the upper 64 bits of the first source operand.
323static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a,
324                                                        __m128d __b) {
325  return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);
326}
327 
328/// Performs element-by-element comparison of the two 128-bit vectors of
329///    [2 x double] and returns a vector containing the greater of each pair
330///    of values.
331///
332///    If either value in a comparison is NaN, returns the value from \a __b.
333///
334/// \headerfile <x86intrin.h>
335///
336/// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction.
337///
338/// \param __a
339///    A 128-bit vector of [2 x double] containing one of the operands.
340/// \param __b
341///    A 128-bit vector of [2 x double] containing one of the operands.
342/// \returns A 128-bit vector of [2 x double] containing the maximum values
343///    between both operands.
344static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a,
345                                                        __m128d __b) {
346  return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);
347}
348 
349/// Performs a bitwise AND of two 128-bit vectors of [2 x double].
350///
351/// \headerfile <x86intrin.h>
352///
353/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.
354///
355/// \param __a
356///    A 128-bit vector of [2 x double] containing one of the source operands.
357/// \param __b
358///    A 128-bit vector of [2 x double] containing one of the source operands.
359/// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the
360///    values between both operands.
361static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a,
362                                                        __m128d __b) {
363  return (__m128d)((__v2du)__a & (__v2du)__b);
364}
365 
366/// Performs a bitwise AND of two 128-bit vectors of [2 x double], using
367///    the one's complement of the values contained in the first source operand.
368///
369/// \headerfile <x86intrin.h>
370///
371/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.
372///
373/// \param __a
374///    A 128-bit vector of [2 x double] containing the left source operand. The
375///    one's complement of this value is used in the bitwise AND.
376/// \param __b
377///    A 128-bit vector of [2 x double] containing the right source operand.
378/// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the
379///    values in the second operand and the one's complement of the first
380///    operand.
381static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a,
382                                                           __m128d __b) {
383  return (__m128d)(~(__v2du)__a & (__v2du)__b);
384}
385 
386/// Performs a bitwise OR of two 128-bit vectors of [2 x double].
387///
388/// \headerfile <x86intrin.h>
389///
390/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.
391///
392/// \param __a
393///    A 128-bit vector of [2 x double] containing one of the source operands.
394/// \param __b
395///    A 128-bit vector of [2 x double] containing one of the source operands.
396/// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the
397///    values between both operands.
398static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a,
399                                                       __m128d __b) {
400  return (__m128d)((__v2du)__a | (__v2du)__b);
401}
402 
403/// Performs a bitwise XOR of two 128-bit vectors of [2 x double].
404///
405/// \headerfile <x86intrin.h>
406///
407/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.
408///
409/// \param __a
410///    A 128-bit vector of [2 x double] containing one of the source operands.
411/// \param __b
412///    A 128-bit vector of [2 x double] containing one of the source operands.
413/// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the
414///    values between both operands.
415static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a,
416                                                        __m128d __b) {
417  return (__m128d)((__v2du)__a ^ (__v2du)__b);
418}
419 
420/// Compares each of the corresponding double-precision values of the
421///    128-bit vectors of [2 x double] for equality.
422///
423///    Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
424///    If either value in a comparison is NaN, returns false.
425///
426/// \headerfile <x86intrin.h>
427///
428/// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction.
429///
430/// \param __a
431///    A 128-bit vector of [2 x double].
432/// \param __b
433///    A 128-bit vector of [2 x double].
434/// \returns A 128-bit vector containing the comparison results.
435static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a,
436                                                          __m128d __b) {
437  return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);
438}
439 
440/// Compares each of the corresponding double-precision values of the
441///    128-bit vectors of [2 x double] to determine if the values in the first
442///    operand are less than those in the second operand.
443///
444///    Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
445///    If either value in a comparison is NaN, returns false.
446///
447/// \headerfile <x86intrin.h>
448///
449/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.
450///
451/// \param __a
452///    A 128-bit vector of [2 x double].
453/// \param __b
454///    A 128-bit vector of [2 x double].
455/// \returns A 128-bit vector containing the comparison results.
456static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a,
457                                                          __m128d __b) {
458  return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);
459}
460 
461/// Compares each of the corresponding double-precision values of the
462///    128-bit vectors of [2 x double] to determine if the values in the first
463///    operand are less than or equal to those in the second operand.
464///
465///    Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
466///    If either value in a comparison is NaN, returns false.
467///
468/// \headerfile <x86intrin.h>
469///
470/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.
471///
472/// \param __a
473///    A 128-bit vector of [2 x double].
474/// \param __b
475///    A 128-bit vector of [2 x double].
476/// \returns A 128-bit vector containing the comparison results.
477static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a,
478                                                          __m128d __b) {
479  return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);
480}
481 
482/// Compares each of the corresponding double-precision values of the
483///    128-bit vectors of [2 x double] to determine if the values in the first
484///    operand are greater than those in the second operand.
485///
486///    Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
487///    If either value in a comparison is NaN, returns false.
488///
489/// \headerfile <x86intrin.h>
490///
491/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.
492///
493/// \param __a
494///    A 128-bit vector of [2 x double].
495/// \param __b
496///    A 128-bit vector of [2 x double].
497/// \returns A 128-bit vector containing the comparison results.
498static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a,
499                                                          __m128d __b) {
500  return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);
501}
502 
503/// Compares each of the corresponding double-precision values of the
504///    128-bit vectors of [2 x double] to determine if the values in the first
505///    operand are greater than or equal to those in the second operand.
506///
507///    Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
508///    If either value in a comparison is NaN, returns false.
509///
510/// \headerfile <x86intrin.h>
511///
512/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.
513///
514/// \param __a
515///    A 128-bit vector of [2 x double].
516/// \param __b
517///    A 128-bit vector of [2 x double].
518/// \returns A 128-bit vector containing the comparison results.
519static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a,
520                                                          __m128d __b) {
521  return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);
522}
523 
524/// Compares each of the corresponding double-precision values of the
525///    128-bit vectors of [2 x double] to determine if the values in the first
526///    operand are ordered with respect to those in the second operand.
527///
528///    A pair of double-precision values are ordered with respect to each
529///    other if neither value is a NaN. Each comparison returns 0x0 for false,
530///    0xFFFFFFFFFFFFFFFF for true.
531///
532/// \headerfile <x86intrin.h>
533///
534/// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction.
535///
536/// \param __a
537///    A 128-bit vector of [2 x double].
538/// \param __b
539///    A 128-bit vector of [2 x double].
540/// \returns A 128-bit vector containing the comparison results.
541static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a,
542                                                           __m128d __b) {
543  return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);
544}
545 
546/// Compares each of the corresponding double-precision values of the
547///    128-bit vectors of [2 x double] to determine if the values in the first
548///    operand are unordered with respect to those in the second operand.
549///
550///    A pair of double-precision values are unordered with respect to each
551///    other if one or both values are NaN. Each comparison returns 0x0 for
552///    false, 0xFFFFFFFFFFFFFFFF for true.
553///
554/// \headerfile <x86intrin.h>
555///
556/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c>
557///   instruction.
558///
559/// \param __a
560///    A 128-bit vector of [2 x double].
561/// \param __b
562///    A 128-bit vector of [2 x double].
563/// \returns A 128-bit vector containing the comparison results.
564static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a,
565                                                             __m128d __b) {
566  return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);
567}
568 
569/// Compares each of the corresponding double-precision values of the
570///    128-bit vectors of [2 x double] to determine if the values in the first
571///    operand are unequal to those in the second operand.
572///
573///    Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
574///    If either value in a comparison is NaN, returns true.
575///
576/// \headerfile <x86intrin.h>
577///
578/// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction.
579///
580/// \param __a
581///    A 128-bit vector of [2 x double].
582/// \param __b
583///    A 128-bit vector of [2 x double].
584/// \returns A 128-bit vector containing the comparison results.
585static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a,
586                                                           __m128d __b) {
587  return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);
588}
589 
590/// Compares each of the corresponding double-precision values of the
591///    128-bit vectors of [2 x double] to determine if the values in the first
592///    operand are not less than those in the second operand.
593///
594///    Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
595///    If either value in a comparison is NaN, returns true.
596///
597/// \headerfile <x86intrin.h>
598///
599/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.
600///
601/// \param __a
602///    A 128-bit vector of [2 x double].
603/// \param __b
604///    A 128-bit vector of [2 x double].
605/// \returns A 128-bit vector containing the comparison results.
606static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a,
607                                                           __m128d __b) {
608  return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);
609}
610 
611/// Compares each of the corresponding double-precision values of the
612///    128-bit vectors of [2 x double] to determine if the values in the first
613///    operand are not less than or equal to those in the second operand.
614///
615///    Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
616///    If either value in a comparison is NaN, returns true.
617///
618/// \headerfile <x86intrin.h>
619///
620/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.
621///
622/// \param __a
623///    A 128-bit vector of [2 x double].
624/// \param __b
625///    A 128-bit vector of [2 x double].
626/// \returns A 128-bit vector containing the comparison results.
627static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a,
628                                                           __m128d __b) {
629  return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);
630}
631 
632/// Compares each of the corresponding double-precision values of the
633///    128-bit vectors of [2 x double] to determine if the values in the first
634///    operand are not greater than those in the second operand.
635///
636///    Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
637///    If either value in a comparison is NaN, returns true.
638///
639/// \headerfile <x86intrin.h>
640///
641/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.
642///
643/// \param __a
644///    A 128-bit vector of [2 x double].
645/// \param __b
646///    A 128-bit vector of [2 x double].
647/// \returns A 128-bit vector containing the comparison results.
648static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a,
649                                                           __m128d __b) {
650  return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);
651}
652 
653/// Compares each of the corresponding double-precision values of the
654///    128-bit vectors of [2 x double] to determine if the values in the first
655///    operand are not greater than or equal to those in the second operand.
656///
657///    Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
658///    If either value in a comparison is NaN, returns true.
659///
660/// \headerfile <x86intrin.h>
661///
662/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.
663///
664/// \param __a
665///    A 128-bit vector of [2 x double].
666/// \param __b
667///    A 128-bit vector of [2 x double].
668/// \returns A 128-bit vector containing the comparison results.
669static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a,
670                                                           __m128d __b) {
671  return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);
672}
673 
674/// Compares the lower double-precision floating-point values in each of
675///    the two 128-bit floating-point vectors of [2 x double] for equality.
676///
677///    The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
678///    If either value in a comparison is NaN, returns false.
679///
680/// \headerfile <x86intrin.h>
681///
682/// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction.
683///
684/// \param __a
685///    A 128-bit vector of [2 x double]. The lower double-precision value is
686///    compared to the lower double-precision value of \a __b.
687/// \param __b
688///    A 128-bit vector of [2 x double]. The lower double-precision value is
689///    compared to the lower double-precision value of \a __a.
690/// \returns A 128-bit vector. The lower 64 bits contains the comparison
691///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
692static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a,
693                                                          __m128d __b) {
694  return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);
695}
696 
697/// Compares the lower double-precision floating-point values in each of
698///    the two 128-bit floating-point vectors of [2 x double] to determine if
699///    the value in the first parameter is less than the corresponding value in
700///    the second parameter.
701///
702///    The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
703///    If either value in a comparison is NaN, returns false.
704///
705/// \headerfile <x86intrin.h>
706///
707/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.
708///
709/// \param __a
710///    A 128-bit vector of [2 x double]. The lower double-precision value is
711///    compared to the lower double-precision value of \a __b.
712/// \param __b
713///    A 128-bit vector of [2 x double]. The lower double-precision value is
714///    compared to the lower double-precision value of \a __a.
715/// \returns A 128-bit vector. The lower 64 bits contains the comparison
716///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
717static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a,
718                                                          __m128d __b) {
719  return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);
720}
721 
722/// Compares the lower double-precision floating-point values in each of
723///    the two 128-bit floating-point vectors of [2 x double] to determine if
724///    the value in the first parameter is less than or equal to the
725///    corresponding value in the second parameter.
726///
727///    The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
728///    If either value in a comparison is NaN, returns false.
729///
730/// \headerfile <x86intrin.h>
731///
732/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.
733///
734/// \param __a
735///    A 128-bit vector of [2 x double]. The lower double-precision value is
736///    compared to the lower double-precision value of \a __b.
737/// \param __b
738///    A 128-bit vector of [2 x double]. The lower double-precision value is
739///    compared to the lower double-precision value of \a __a.
740/// \returns A 128-bit vector. The lower 64 bits contains the comparison
741///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
742static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a,
743                                                          __m128d __b) {
744  return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
745}
746 
747/// Compares the lower double-precision floating-point values in each of
748///    the two 128-bit floating-point vectors of [2 x double] to determine if
749///    the value in the first parameter is greater than the corresponding value
750///    in the second parameter.
751///
752///    The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
753///    If either value in a comparison is NaN, returns false.
754///
755/// \headerfile <x86intrin.h>
756///
757/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.
758///
759/// \param __a
760///     A 128-bit vector of [2 x double]. The lower double-precision value is
761///     compared to the lower double-precision value of \a __b.
762/// \param __b
763///     A 128-bit vector of [2 x double]. The lower double-precision value is
764///     compared to the lower double-precision value of \a __a.
765/// \returns A 128-bit vector. The lower 64 bits contains the comparison
766///     results. The upper 64 bits are copied from the upper 64 bits of \a __a.
767static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a,
768                                                          __m128d __b) {
769  __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);
770  return __extension__(__m128d){__c[0], __a[1]};
771}
772 
773/// Compares the lower double-precision floating-point values in each of
774///    the two 128-bit floating-point vectors of [2 x double] to determine if
775///    the value in the first parameter is greater than or equal to the
776///    corresponding value in the second parameter.
777///
778///    The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
779///    If either value in a comparison is NaN, returns false.
780///
781/// \headerfile <x86intrin.h>
782///
783/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.
784///
785/// \param __a
786///    A 128-bit vector of [2 x double]. The lower double-precision value is
787///    compared to the lower double-precision value of \a __b.
788/// \param __b
789///    A 128-bit vector of [2 x double]. The lower double-precision value is
790///    compared to the lower double-precision value of \a __a.
791/// \returns A 128-bit vector. The lower 64 bits contains the comparison
792///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
793static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a,
794                                                          __m128d __b) {
795  __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);
796  return __extension__(__m128d){__c[0], __a[1]};
797}
798 
799/// Compares the lower double-precision floating-point values in each of
800///    the two 128-bit floating-point vectors of [2 x double] to determine if
801///    the value in the first parameter is ordered with respect to the
802///    corresponding value in the second parameter.
803///
804///    The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair
805///    of double-precision values are ordered with respect to each other if
806///    neither value is a NaN.
807///
808/// \headerfile <x86intrin.h>
809///
810/// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction.
811///
812/// \param __a
813///    A 128-bit vector of [2 x double]. The lower double-precision value is
814///    compared to the lower double-precision value of \a __b.
815/// \param __b
816///    A 128-bit vector of [2 x double]. The lower double-precision value is
817///    compared to the lower double-precision value of \a __a.
818/// \returns A 128-bit vector. The lower 64 bits contains the comparison
819///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
820static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a,
821                                                           __m128d __b) {
822  return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
823}
824 
825/// Compares the lower double-precision floating-point values in each of
826///    the two 128-bit floating-point vectors of [2 x double] to determine if
827///    the value in the first parameter is unordered with respect to the
828///    corresponding value in the second parameter.
829///
830///    The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair
831///    of double-precision values are unordered with respect to each other if
832///    one or both values are NaN.
833///
834/// \headerfile <x86intrin.h>
835///
836/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c>
837///   instruction.
838///
839/// \param __a
840///    A 128-bit vector of [2 x double]. The lower double-precision value is
841///    compared to the lower double-precision value of \a __b.
842/// \param __b
843///    A 128-bit vector of [2 x double]. The lower double-precision value is
844///    compared to the lower double-precision value of \a __a.
845/// \returns A 128-bit vector. The lower 64 bits contains the comparison
846///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
847static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a,
848                                                             __m128d __b) {
849  return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);
850}
851 
852/// Compares the lower double-precision floating-point values in each of
853///    the two 128-bit floating-point vectors of [2 x double] to determine if
854///    the value in the first parameter is unequal to the corresponding value in
855///    the second parameter.
856///
857///    The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
858///    If either value in a comparison is NaN, returns true.
859///
860/// \headerfile <x86intrin.h>
861///
862/// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction.
863///
864/// \param __a
865///    A 128-bit vector of [2 x double]. The lower double-precision value is
866///    compared to the lower double-precision value of \a __b.
867/// \param __b
868///    A 128-bit vector of [2 x double]. The lower double-precision value is
869///    compared to the lower double-precision value of \a __a.
870/// \returns A 128-bit vector. The lower 64 bits contains the comparison
871///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
872static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a,
873                                                           __m128d __b) {
874  return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);
875}
876 
877/// Compares the lower double-precision floating-point values in each of
878///    the two 128-bit floating-point vectors of [2 x double] to determine if
879///    the value in the first parameter is not less than the corresponding
880///    value in the second parameter.
881///
882///    The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
883///    If either value in a comparison is NaN, returns true.
884///
885/// \headerfile <x86intrin.h>
886///
887/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.
888///
889/// \param __a
890///    A 128-bit vector of [2 x double]. The lower double-precision value is
891///    compared to the lower double-precision value of \a __b.
892/// \param __b
893///    A 128-bit vector of [2 x double]. The lower double-precision value is
894///    compared to the lower double-precision value of \a __a.
895/// \returns A 128-bit vector. The lower 64 bits contains the comparison
896///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
897static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a,
898                                                           __m128d __b) {
899  return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);
900}
901 
902/// Compares the lower double-precision floating-point values in each of
903///    the two 128-bit floating-point vectors of [2 x double] to determine if
904///    the value in the first parameter is not less than or equal to the
905///    corresponding value in the second parameter.
906///
907///    The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
908///    If either value in a comparison is NaN, returns true.
909///
910/// \headerfile <x86intrin.h>
911///
912/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.
913///
914/// \param __a
915///    A 128-bit vector of [2 x double]. The lower double-precision value is
916///    compared to the lower double-precision value of \a __b.
917/// \param __b
918///    A 128-bit vector of [2 x double]. The lower double-precision value is
919///    compared to the lower double-precision value of \a __a.
920/// \returns  A 128-bit vector. The lower 64 bits contains the comparison
921///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
922static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a,
923                                                           __m128d __b) {
924  return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);
925}
926 
927/// Compares the lower double-precision floating-point values in each of
928///    the two 128-bit floating-point vectors of [2 x double] to determine if
929///    the value in the first parameter is not greater than the corresponding
930///    value in the second parameter.
931///
932///    The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
933///    If either value in a comparison is NaN, returns true.
934///
935/// \headerfile <x86intrin.h>
936///
937/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.
938///
939/// \param __a
940///    A 128-bit vector of [2 x double]. The lower double-precision value is
941///    compared to the lower double-precision value of \a __b.
942/// \param __b
943///    A 128-bit vector of [2 x double]. The lower double-precision value is
944///    compared to the lower double-precision value of \a __a.
945/// \returns A 128-bit vector. The lower 64 bits contains the comparison
946///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
947static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a,
948                                                           __m128d __b) {
949  __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);
950  return __extension__(__m128d){__c[0], __a[1]};
951}
952 
953/// Compares the lower double-precision floating-point values in each of
954///    the two 128-bit floating-point vectors of [2 x double] to determine if
955///    the value in the first parameter is not greater than or equal to the
956///    corresponding value in the second parameter.
957///
958///    The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
959///    If either value in a comparison is NaN, returns true.
960///
961/// \headerfile <x86intrin.h>
962///
963/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.
964///
965/// \param __a
966///    A 128-bit vector of [2 x double]. The lower double-precision value is
967///    compared to the lower double-precision value of \a __b.
968/// \param __b
969///    A 128-bit vector of [2 x double]. The lower double-precision value is
970///    compared to the lower double-precision value of \a __a.
971/// \returns A 128-bit vector. The lower 64 bits contains the comparison
972///    results. The upper 64 bits are copied from the upper 64 bits of \a __a.
973static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a,
974                                                           __m128d __b) {
975  __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);
976  return __extension__(__m128d){__c[0], __a[1]};
977}
978 
979/// Compares the lower double-precision floating-point values in each of
980///    the two 128-bit floating-point vectors of [2 x double] for equality.
981///
982///    The comparison returns 0 for false, 1 for true. If either value in a
983///    comparison is NaN, returns 0.
984///
985/// \headerfile <x86intrin.h>
986///
987/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
988///
989/// \param __a
990///    A 128-bit vector of [2 x double]. The lower double-precision value is
991///    compared to the lower double-precision value of \a __b.
992/// \param __b
993///    A 128-bit vector of [2 x double]. The lower double-precision value is
994///    compared to the lower double-precision value of \a __a.
995/// \returns An integer containing the comparison results.
996static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a,
997                                                       __m128d __b) {
998  return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);
999}
1000 
1001/// Compares the lower double-precision floating-point values in each of
1002///    the two 128-bit floating-point vectors of [2 x double] to determine if
1003///    the value in the first parameter is less than the corresponding value in
1004///    the second parameter.
1005///
1006///    The comparison returns 0 for false, 1 for true. If either value in a
1007///    comparison is NaN, returns 0.
1008///
1009/// \headerfile <x86intrin.h>
1010///
1011/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1012///
1013/// \param __a
1014///    A 128-bit vector of [2 x double]. The lower double-precision value is
1015///    compared to the lower double-precision value of \a __b.
1016/// \param __b
1017///    A 128-bit vector of [2 x double]. The lower double-precision value is
1018///    compared to the lower double-precision value of \a __a.
1019/// \returns An integer containing the comparison results.
1020static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a,
1021                                                       __m128d __b) {
1022  return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);
1023}
1024 
1025/// Compares the lower double-precision floating-point values in each of
1026///    the two 128-bit floating-point vectors of [2 x double] to determine if
1027///    the value in the first parameter is less than or equal to the
1028///    corresponding value in the second parameter.
1029///
1030///    The comparison returns 0 for false, 1 for true. If either value in a
1031///    comparison is NaN, returns 0.
1032///
1033/// \headerfile <x86intrin.h>
1034///
1035/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1036///
1037/// \param __a
1038///    A 128-bit vector of [2 x double]. The lower double-precision value is
1039///    compared to the lower double-precision value of \a __b.
1040/// \param __b
1041///     A 128-bit vector of [2 x double]. The lower double-precision value is
1042///     compared to the lower double-precision value of \a __a.
1043/// \returns An integer containing the comparison results.
1044static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a,
1045                                                       __m128d __b) {
1046  return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);
1047}
1048 
1049/// Compares the lower double-precision floating-point values in each of
1050///    the two 128-bit floating-point vectors of [2 x double] to determine if
1051///    the value in the first parameter is greater than the corresponding value
1052///    in the second parameter.
1053///
1054///    The comparison returns 0 for false, 1 for true. If either value in a
1055///    comparison is NaN, returns 0.
1056///
1057/// \headerfile <x86intrin.h>
1058///
1059/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1060///
1061/// \param __a
1062///    A 128-bit vector of [2 x double]. The lower double-precision value is
1063///    compared to the lower double-precision value of \a __b.
1064/// \param __b
1065///    A 128-bit vector of [2 x double]. The lower double-precision value is
1066///    compared to the lower double-precision value of \a __a.
1067/// \returns An integer containing the comparison results.
1068static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a,
1069                                                       __m128d __b) {
1070  return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);
1071}
1072 
1073/// Compares the lower double-precision floating-point values in each of
1074///    the two 128-bit floating-point vectors of [2 x double] to determine if
1075///    the value in the first parameter is greater than or equal to the
1076///    corresponding value in the second parameter.
1077///
1078///    The comparison returns 0 for false, 1 for true. If either value in a
1079///    comparison is NaN, returns 0.
1080///
1081/// \headerfile <x86intrin.h>
1082///
1083/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1084///
1085/// \param __a
1086///    A 128-bit vector of [2 x double]. The lower double-precision value is
1087///    compared to the lower double-precision value of \a __b.
1088/// \param __b
1089///    A 128-bit vector of [2 x double]. The lower double-precision value is
1090///    compared to the lower double-precision value of \a __a.
1091/// \returns An integer containing the comparison results.
1092static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a,
1093                                                       __m128d __b) {
1094  return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);
1095}
1096 
1097/// Compares the lower double-precision floating-point values in each of
1098///    the two 128-bit floating-point vectors of [2 x double] to determine if
1099///    the value in the first parameter is unequal to the corresponding value in
1100///    the second parameter.
1101///
1102///    The comparison returns 0 for false, 1 for true. If either value in a
1103///    comparison is NaN, returns 1.
1104///
1105/// \headerfile <x86intrin.h>
1106///
1107/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.
1108///
1109/// \param __a
1110///    A 128-bit vector of [2 x double]. The lower double-precision value is
1111///    compared to the lower double-precision value of \a __b.
1112/// \param __b
1113///    A 128-bit vector of [2 x double]. The lower double-precision value is
1114///    compared to the lower double-precision value of \a __a.
1115/// \returns An integer containing the comparison results.
1116static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a,
1117                                                        __m128d __b) {
1118  return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);
1119}
1120 
1121/// Compares the lower double-precision floating-point values in each of
1122///    the two 128-bit floating-point vectors of [2 x double] for equality.
1123///
1124///    The comparison returns 0 for false, 1 for true. If either value in a
1125///    comparison is NaN, returns 0.
1126///
1127/// \headerfile <x86intrin.h>
1128///
1129/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1130///
1131/// \param __a
1132///    A 128-bit vector of [2 x double]. The lower double-precision value is
1133///    compared to the lower double-precision value of \a __b.
1134/// \param __b
1135///    A 128-bit vector of [2 x double]. The lower double-precision value is
1136///    compared to the lower double-precision value of \a __a.
1137/// \returns An integer containing the comparison results.
1138static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a,
1139                                                        __m128d __b) {
1140  return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);
1141}
1142 
1143/// Compares the lower double-precision floating-point values in each of
1144///    the two 128-bit floating-point vectors of [2 x double] to determine if
1145///    the value in the first parameter is less than the corresponding value in
1146///    the second parameter.
1147///
1148///    The comparison returns 0 for false, 1 for true. If either value in a
1149///    comparison is NaN, returns 0.
1150///
1151/// \headerfile <x86intrin.h>
1152///
1153/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1154///
1155/// \param __a
1156///    A 128-bit vector of [2 x double]. The lower double-precision value is
1157///    compared to the lower double-precision value of \a __b.
1158/// \param __b
1159///    A 128-bit vector of [2 x double]. The lower double-precision value is
1160///    compared to the lower double-precision value of \a __a.
1161/// \returns An integer containing the comparison results.
1162static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a,
1163                                                        __m128d __b) {
1164  return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);
1165}
1166 
1167/// Compares the lower double-precision floating-point values in each of
1168///    the two 128-bit floating-point vectors of [2 x double] to determine if
1169///    the value in the first parameter is less than or equal to the
1170///    corresponding value in the second parameter.
1171///
1172///    The comparison returns 0 for false, 1 for true. If either value in a
1173///    comparison is NaN, returns 0.
1174///
1175/// \headerfile <x86intrin.h>
1176///
1177/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1178///
1179/// \param __a
1180///    A 128-bit vector of [2 x double]. The lower double-precision value is
1181///    compared to the lower double-precision value of \a __b.
1182/// \param __b
1183///     A 128-bit vector of [2 x double]. The lower double-precision value is
1184///     compared to the lower double-precision value of \a __a.
1185/// \returns An integer containing the comparison results.
1186static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a,
1187                                                        __m128d __b) {
1188  return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);
1189}
1190 
1191/// Compares the lower double-precision floating-point values in each of
1192///    the two 128-bit floating-point vectors of [2 x double] to determine if
1193///    the value in the first parameter is greater than the corresponding value
1194///    in the second parameter.
1195///
1196///    The comparison returns 0 for false, 1 for true. If either value in a
1197///    comparison is NaN, returns 0.
1198///
1199/// \headerfile <x86intrin.h>
1200///
1201/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1202///
1203/// \param __a
1204///    A 128-bit vector of [2 x double]. The lower double-precision value is
1205///    compared to the lower double-precision value of \a __b.
1206/// \param __b
1207///     A 128-bit vector of [2 x double]. The lower double-precision value is
1208///     compared to the lower double-precision value of \a __a.
1209/// \returns An integer containing the comparison results.
1210static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a,
1211                                                        __m128d __b) {
1212  return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);
1213}
1214 
1215/// Compares the lower double-precision floating-point values in each of
1216///    the two 128-bit floating-point vectors of [2 x double] to determine if
1217///    the value in the first parameter is greater than or equal to the
1218///    corresponding value in the second parameter.
1219///
1220///    The comparison returns 0 for false, 1 for true. If either value in a
1221///    comparison is NaN, returns 0.
1222///
1223/// \headerfile <x86intrin.h>
1224///
1225/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1226///
1227/// \param __a
1228///    A 128-bit vector of [2 x double]. The lower double-precision value is
1229///    compared to the lower double-precision value of \a __b.
1230/// \param __b
1231///    A 128-bit vector of [2 x double]. The lower double-precision value is
1232///    compared to the lower double-precision value of \a __a.
1233/// \returns An integer containing the comparison results.
1234static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a,
1235                                                        __m128d __b) {
1236  return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);
1237}
1238 
1239/// Compares the lower double-precision floating-point values in each of
1240///    the two 128-bit floating-point vectors of [2 x double] to determine if
1241///    the value in the first parameter is unequal to the corresponding value in
1242///    the second parameter.
1243///
1244///    The comparison returns 0 for false, 1 for true. If either value in a
1245///    comparison is NaN, returns 1.
1246///
1247/// \headerfile <x86intrin.h>
1248///
1249/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.
1250///
1251/// \param __a
1252///    A 128-bit vector of [2 x double]. The lower double-precision value is
1253///    compared to the lower double-precision value of \a __b.
1254/// \param __b
1255///    A 128-bit vector of [2 x double]. The lower double-precision value is
1256///    compared to the lower double-precision value of \a __a.
1257/// \returns An integer containing the comparison result.
1258static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a,
1259                                                         __m128d __b) {
1260  return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);
1261}
1262 
1263/// Converts the two double-precision floating-point elements of a
1264///    128-bit vector of [2 x double] into two single-precision floating-point
1265///    values, returned in the lower 64 bits of a 128-bit vector of [4 x float].
1266///    The upper 64 bits of the result vector are set to zero.
1267///
1268/// \headerfile <x86intrin.h>
1269///
1270/// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction.
1271///
1272/// \param __a
1273///    A 128-bit vector of [2 x double].
1274/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
1275///    converted values. The upper 64 bits are set to zero.
1276static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) {
1277  return __builtin_ia32_cvtpd2ps((__v2df)__a);
1278}
1279 
1280/// Converts the lower two single-precision floating-point elements of a
1281///    128-bit vector of [4 x float] into two double-precision floating-point
1282///    values, returned in a 128-bit vector of [2 x double]. The upper two
1283///    elements of the input vector are unused.
1284///
1285/// \headerfile <x86intrin.h>
1286///
1287/// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction.
1288///
1289/// \param __a
1290///    A 128-bit vector of [4 x float]. The lower two single-precision
1291///    floating-point elements are converted to double-precision values. The
1292///    upper two elements are unused.
1293/// \returns A 128-bit vector of [2 x double] containing the converted values.
1294static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) {
1295  return (__m128d) __builtin_convertvector(
1296      __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);
1297}
1298 
1299/// Converts the lower two integer elements of a 128-bit vector of
1300///    [4 x i32] into two double-precision floating-point values, returned in a
1301///    128-bit vector of [2 x double].
1302///
1303///    The upper two elements of the input vector are unused.
1304///
1305/// \headerfile <x86intrin.h>
1306///
1307/// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction.
1308///
1309/// \param __a
1310///    A 128-bit integer vector of [4 x i32]. The lower two integer elements are
1311///    converted to double-precision values.
1312///
1313///    The upper two elements are unused.
1314/// \returns A 128-bit vector of [2 x double] containing the converted values.
1315static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a) {
1316  return (__m128d) __builtin_convertvector(
1317      __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);
1318}
1319 
1320/// Converts the two double-precision floating-point elements of a
1321///    128-bit vector of [2 x double] into two signed 32-bit integer values,
1322///    returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper
1323///    64 bits of the result vector are set to zero.
1324///
1325///    If a converted value does not fit in a 32-bit integer, raises a
1326///    floating-point invalid exception. If the exception is masked, returns
1327///    the most negative integer.
1328///
1329/// \headerfile <x86intrin.h>
1330///
1331/// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction.
1332///
1333/// \param __a
1334///    A 128-bit vector of [2 x double].
1335/// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the
1336///    converted values. The upper 64 bits are set to zero.
1337static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a) {
1338  return __builtin_ia32_cvtpd2dq((__v2df)__a);
1339}
1340 
1341/// Converts the low-order element of a 128-bit vector of [2 x double]
1342///    into a 32-bit signed integer value.
1343///
1344///    If the converted value does not fit in a 32-bit integer, raises a
1345///    floating-point invalid exception. If the exception is masked, returns
1346///    the most negative integer.
1347///
1348/// \headerfile <x86intrin.h>
1349///
1350/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.
1351///
1352/// \param __a
1353///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
1354///    conversion.
1355/// \returns A 32-bit signed integer containing the converted value.
1356static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) {
1357  return __builtin_ia32_cvtsd2si((__v2df)__a);
1358}
1359 
1360/// Converts the lower double-precision floating-point element of a
1361///    128-bit vector of [2 x double], in the second parameter, into a
1362///    single-precision floating-point value, returned in the lower 32 bits of a
1363///    128-bit vector of [4 x float]. The upper 96 bits of the result vector are
1364///    copied from the upper 96 bits of the first parameter.
1365///
1366/// \headerfile <x86intrin.h>
1367///
1368/// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction.
1369///
1370/// \param __a
1371///    A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are
1372///    copied to the upper 96 bits of the result.
1373/// \param __b
1374///    A 128-bit vector of [2 x double]. The lower double-precision
1375///    floating-point element is used in the conversion.
1376/// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the
1377///    converted value from the second parameter. The upper 96 bits are copied
1378///    from the upper 96 bits of the first parameter.
1379static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a,
1380                                                         __m128d __b) {
1381  return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
1382}
1383 
1384/// Converts a 32-bit signed integer value, in the second parameter, into
1385///    a double-precision floating-point value, returned in the lower 64 bits of
1386///    a 128-bit vector of [2 x double]. The upper 64 bits of the result vector
1387///    are copied from the upper 64 bits of the first parameter.
1388///
1389/// \headerfile <x86intrin.h>
1390///
1391/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.
1392///
1393/// \param __a
1394///    A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are
1395///    copied to the upper 64 bits of the result.
1396/// \param __b
1397///    A 32-bit signed integer containing the value to be converted.
1398/// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the
1399///    converted value from the second parameter. The upper 64 bits are copied
1400///    from the upper 64 bits of the first parameter.
1401static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a,
1402                                                            int __b) {
1403  __a[0] = __b;
1404  return __a;
1405}
1406 
1407/// Converts the lower single-precision floating-point element of a
1408///    128-bit vector of [4 x float], in the second parameter, into a
1409///    double-precision floating-point value, returned in the lower 64 bits of
1410///    a 128-bit vector of [2 x double]. The upper 64 bits of the result vector
1411///    are copied from the upper 64 bits of the first parameter.
1412///
1413/// \headerfile <x86intrin.h>
1414///
1415/// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction.
1416///
1417/// \param __a
1418///    A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are
1419///    copied to the upper 64 bits of the result.
1420/// \param __b
1421///    A 128-bit vector of [4 x float]. The lower single-precision
1422///    floating-point element is used in the conversion.
1423/// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the
1424///    converted value from the second parameter. The upper 64 bits are copied
1425///    from the upper 64 bits of the first parameter.
1426static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a,
1427                                                          __m128 __b) {
1428  __a[0] = __b[0];
1429  return __a;
1430}
1431 
1432/// Converts the two double-precision floating-point elements of a
1433///    128-bit vector of [2 x double] into two signed truncated (rounded
1434///    toward zero) 32-bit integer values, returned in the lower 64 bits
1435///    of a 128-bit vector of [4 x i32].
1436///
1437///    If a converted value does not fit in a 32-bit integer, raises a
1438///    floating-point invalid exception. If the exception is masked, returns
1439///    the most negative integer.
1440///
1441/// \headerfile <x86intrin.h>
1442///
1443/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c>
1444///   instruction.
1445///
1446/// \param __a
1447///    A 128-bit vector of [2 x double].
1448/// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the
1449///    converted values. The upper 64 bits are set to zero.
1450static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a) {
1451  return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);
1452}
1453 
1454/// Converts the low-order element of a [2 x double] vector into a 32-bit
1455///    signed truncated (rounded toward zero) integer value.
1456///
1457///    If the converted value does not fit in a 32-bit integer, raises a
1458///    floating-point invalid exception. If the exception is masked, returns
1459///    the most negative integer.
1460///
1461/// \headerfile <x86intrin.h>
1462///
1463/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
1464///   instruction.
1465///
1466/// \param __a
1467///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
1468///    conversion.
1469/// \returns A 32-bit signed integer containing the converted value.
1470static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a) {
1471  return __builtin_ia32_cvttsd2si((__v2df)__a);
1472}
1473 
1474/// Converts the two double-precision floating-point elements of a
1475///    128-bit vector of [2 x double] into two signed 32-bit integer values,
1476///    returned in a 64-bit vector of [2 x i32].
1477///
1478///    If a converted value does not fit in a 32-bit integer, raises a
1479///    floating-point invalid exception. If the exception is masked, returns
1480///    the most negative integer.
1481///
1482/// \headerfile <x86intrin.h>
1483///
1484/// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction.
1485///
1486/// \param __a
1487///    A 128-bit vector of [2 x double].
1488/// \returns A 64-bit vector of [2 x i32] containing the converted values.
1489static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a) {
1490  return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);
1491}
1492 
1493/// Converts the two double-precision floating-point elements of a
1494///    128-bit vector of [2 x double] into two signed truncated (rounded toward
1495///    zero) 32-bit integer values, returned in a 64-bit vector of [2 x i32].
1496///
1497///    If a converted value does not fit in a 32-bit integer, raises a
1498///    floating-point invalid exception. If the exception is masked, returns
1499///    the most negative integer.
1500///
1501/// \headerfile <x86intrin.h>
1502///
1503/// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction.
1504///
1505/// \param __a
1506///    A 128-bit vector of [2 x double].
1507/// \returns A 64-bit vector of [2 x i32] containing the converted values.
1508static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a) {
1509  return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);
1510}
1511 
1512/// Converts the two signed 32-bit integer elements of a 64-bit vector of
1513///    [2 x i32] into two double-precision floating-point values, returned in a
1514///    128-bit vector of [2 x double].
1515///
1516/// \headerfile <x86intrin.h>
1517///
1518/// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction.
1519///
1520/// \param __a
1521///    A 64-bit vector of [2 x i32].
1522/// \returns A 128-bit vector of [2 x double] containing the converted values.
1523static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a) {
1524  return __builtin_ia32_cvtpi2pd((__v2si)__a);
1525}
1526 
1527/// Returns the low-order element of a 128-bit vector of [2 x double] as
1528///    a double-precision floating-point value.
1529///
1530/// \headerfile <x86intrin.h>
1531///
1532/// This intrinsic has no corresponding instruction.
1533///
1534/// \param __a
1535///    A 128-bit vector of [2 x double]. The lower 64 bits are returned.
1536/// \returns A double-precision floating-point value copied from the lower 64
1537///    bits of \a __a.
1538static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a) {
1539  return __a[0];
1540}
1541 
1542/// Loads a 128-bit floating-point vector of [2 x double] from an aligned
1543///    memory location.
1544///
1545/// \headerfile <x86intrin.h>
1546///
1547/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.
1548///
1549/// \param __dp
1550///    A pointer to a 128-bit memory location. The address of the memory
1551///    location has to be 16-byte aligned.
1552/// \returns A 128-bit vector of [2 x double] containing the loaded values.
1553static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp) {
1554  return *(const __m128d *)__dp;
1555}
1556 
1557/// Loads a double-precision floating-point value from a specified memory
1558///    location and duplicates it to both vector elements of a 128-bit vector of
1559///    [2 x double].
1560///
1561/// \headerfile <x86intrin.h>
1562///
1563/// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction.
1564///
1565/// \param __dp
1566///    A pointer to a memory location containing a double-precision value.
1567/// \returns A 128-bit vector of [2 x double] containing the loaded and
1568///    duplicated values.
1569static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp) {
1570  struct __mm_load1_pd_struct {
1571    double __u;
1572  } __attribute__((__packed__, __may_alias__));
1573  double __u = ((const struct __mm_load1_pd_struct *)__dp)->__u;
1574  return __extension__(__m128d){__u, __u};
1575}
1576 
1577#define _mm_load_pd1(dp)_mm_load1_pd(dp) _mm_load1_pd(dp)
1578 
1579/// Loads two double-precision values, in reverse order, from an aligned
1580///    memory location into a 128-bit vector of [2 x double].
1581///
1582/// \headerfile <x86intrin.h>
1583///
1584/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction +
1585/// needed shuffling instructions. In AVX mode, the shuffling may be combined
1586/// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction.
1587///
1588/// \param __dp
1589///    A 16-byte aligned pointer to an array of double-precision values to be
1590///    loaded in reverse order.
1591/// \returns A 128-bit vector of [2 x double] containing the reversed loaded
1592///    values.
1593static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp) {
1594  __m128d __u = *(const __m128d *)__dp;
1595  return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);
1596}
1597 
1598/// Loads a 128-bit floating-point vector of [2 x double] from an
1599///    unaligned memory location.
1600///
1601/// \headerfile <x86intrin.h>
1602///
1603/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.
1604///
1605/// \param __dp
1606///    A pointer to a 128-bit memory location. The address of the memory
1607///    location does not have to be aligned.
1608/// \returns A 128-bit vector of [2 x double] containing the loaded values.
1609static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp) {
1610  struct __loadu_pd {
1611    __m128d_u __v;
1612  } __attribute__((__packed__, __may_alias__));
1613  return ((const struct __loadu_pd *)__dp)->__v;
1614}
1615 
1616/// Loads a 64-bit integer value to the low element of a 128-bit integer
1617///    vector and clears the upper element.
1618///
1619/// \headerfile <x86intrin.h>
1620///
1621/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
1622///
1623/// \param __a
1624///    A pointer to a 64-bit memory location. The address of the memory
1625///    location does not have to be aligned.
1626/// \returns A 128-bit vector of [2 x i64] containing the loaded value.
1627static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si64(void const *__a) {
1628  struct __loadu_si64 {
1629    long long __v;
1630  } __attribute__((__packed__, __may_alias__));
1631  long long __u = ((const struct __loadu_si64 *)__a)->__v;
1632  return __extension__(__m128i)(__v2di){__u, 0LL};
1633}
1634 
1635/// Loads a 32-bit integer value to the low element of a 128-bit integer
1636///    vector and clears the upper element.
1637///
1638/// \headerfile <x86intrin.h>
1639///
1640/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
1641///
1642/// \param __a
1643///    A pointer to a 32-bit memory location. The address of the memory
1644///    location does not have to be aligned.
1645/// \returns A 128-bit vector of [4 x i32] containing the loaded value.
1646static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si32(void const *__a) {
1647  struct __loadu_si32 {
1648    int __v;
1649  } __attribute__((__packed__, __may_alias__));
1650  int __u = ((const struct __loadu_si32 *)__a)->__v;
1651  return __extension__(__m128i)(__v4si){__u, 0, 0, 0};
1652}
1653 
1654/// Loads a 16-bit integer value to the low element of a 128-bit integer
1655///    vector and clears the upper element.
1656///
1657/// \headerfile <x86intrin.h>
1658///
1659/// This intrinsic does not correspond to a specific instruction.
1660///
1661/// \param __a
1662///    A pointer to a 16-bit memory location. The address of the memory
1663///    location does not have to be aligned.
1664/// \returns A 128-bit vector of [8 x i16] containing the loaded value.
1665static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si16(void const *__a) {
1666  struct __loadu_si16 {
1667    short __v;
1668  } __attribute__((__packed__, __may_alias__));
1669  short __u = ((const struct __loadu_si16 *)__a)->__v;
1670  return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
1671}
1672 
1673/// Loads a 64-bit double-precision value to the low element of a
1674///    128-bit integer vector and clears the upper element.
1675///
1676/// \headerfile <x86intrin.h>
1677///
1678/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.
1679///
1680/// \param __dp
1681///    A pointer to a memory location containing a double-precision value.
1682///    The address of the memory location does not have to be aligned.
1683/// \returns A 128-bit vector of [2 x double] containing the loaded value.
1684static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp) {
1685  struct __mm_load_sd_struct {
1686    double __u;
1687  } __attribute__((__packed__, __may_alias__));
1688  double __u = ((const struct __mm_load_sd_struct *)__dp)->__u;
1689  return __extension__(__m128d){__u, 0};
1690}
1691 
1692/// Loads a double-precision value into the high-order bits of a 128-bit
1693///    vector of [2 x double]. The low-order bits are copied from the low-order
1694///    bits of the first operand.
1695///
1696/// \headerfile <x86intrin.h>
1697///
1698/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.
1699///
1700/// \param __a
1701///    A 128-bit vector of [2 x double]. \n
1702///    Bits [63:0] are written to bits [63:0] of the result.
1703/// \param __dp
1704///    A pointer to a 64-bit memory location containing a double-precision
1705///    floating-point value that is loaded. The loaded value is written to bits
1706///    [127:64] of the result. The address of the memory location does not have
1707///    to be aligned.
1708/// \returns A 128-bit vector of [2 x double] containing the moved values.
1709static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a,
1710                                                          double const *__dp) {
1711  struct __mm_loadh_pd_struct {
1712    double __u;
1713  } __attribute__((__packed__, __may_alias__));
1714  double __u = ((const struct __mm_loadh_pd_struct *)__dp)->__u;
1715  return __extension__(__m128d){__a[0], __u};
1716}
1717 
1718/// Loads a double-precision value into the low-order bits of a 128-bit
1719///    vector of [2 x double]. The high-order bits are copied from the
1720///    high-order bits of the first operand.
1721///
1722/// \headerfile <x86intrin.h>
1723///
1724/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.
1725///
1726/// \param __a
1727///    A 128-bit vector of [2 x double]. \n
1728///    Bits [127:64] are written to bits [127:64] of the result.
1729/// \param __dp
1730///    A pointer to a 64-bit memory location containing a double-precision
1731///    floating-point value that is loaded. The loaded value is written to bits
1732///    [63:0] of the result. The address of the memory location does not have to
1733///    be aligned.
1734/// \returns A 128-bit vector of [2 x double] containing the moved values.
1735static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a,
1736                                                          double const *__dp) {
1737  struct __mm_loadl_pd_struct {
1738    double __u;
1739  } __attribute__((__packed__, __may_alias__));
1740  double __u = ((const struct __mm_loadl_pd_struct *)__dp)->__u;
1741  return __extension__(__m128d){__u, __a[1]};
1742}
1743 
1744/// Constructs a 128-bit floating-point vector of [2 x double] with
1745///    unspecified content. This could be used as an argument to another
1746///    intrinsic function where the argument is required but the value is not
1747///    actually used.
1748///
1749/// \headerfile <x86intrin.h>
1750///
1751/// This intrinsic has no corresponding instruction.
1752///
1753/// \returns A 128-bit floating-point vector of [2 x double] with unspecified
1754///    content.
1755static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void) {
1756  return (__m128d)__builtin_ia32_undef128();
1757}
1758 
1759/// Constructs a 128-bit floating-point vector of [2 x double]. The lower
1760///    64 bits of the vector are initialized with the specified double-precision
1761///    floating-point value. The upper 64 bits are set to zero.
1762///
1763/// \headerfile <x86intrin.h>
1764///
1765/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
1766///
1767/// \param __w
1768///    A double-precision floating-point value used to initialize the lower 64
1769///    bits of the result.
1770/// \returns An initialized 128-bit floating-point vector of [2 x double]. The
1771///    lower 64 bits contain the value of the parameter. The upper 64 bits are
1772///    set to zero.
1773static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) {
1774  return __extension__(__m128d){__w, 0.0};
1775}
1776 
1777/// Constructs a 128-bit floating-point vector of [2 x double], with each
1778///    of the two double-precision floating-point vector elements set to the
1779///    specified double-precision floating-point value.
1780///
1781/// \headerfile <x86intrin.h>
1782///
1783/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.
1784///
1785/// \param __w
1786///    A double-precision floating-point value used to initialize each vector
1787///    element of the result.
1788/// \returns An initialized 128-bit floating-point vector of [2 x double].
1789static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w) {
1790  return __extension__(__m128d){__w, __w};
1791}
1792 
1793/// Constructs a 128-bit floating-point vector of [2 x double], with each
1794///    of the two double-precision floating-point vector elements set to the
1795///    specified double-precision floating-point value.
1796///
1797/// \headerfile <x86intrin.h>
1798///
1799/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.
1800///
1801/// \param __w
1802///    A double-precision floating-point value used to initialize each vector
1803///    element of the result.
1804/// \returns An initialized 128-bit floating-point vector of [2 x double].
1805static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd1(double __w) {
1806  return _mm_set1_pd(__w);
1807}
1808 
1809/// Constructs a 128-bit floating-point vector of [2 x double]
1810///    initialized with the specified double-precision floating-point values.
1811///
1812/// \headerfile <x86intrin.h>
1813///
1814/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
1815///
1816/// \param __w
1817///    A double-precision floating-point value used to initialize the upper 64
1818///    bits of the result.
1819/// \param __x
1820///    A double-precision floating-point value used to initialize the lower 64
1821///    bits of the result.
1822/// \returns An initialized 128-bit floating-point vector of [2 x double].
1823static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w,
1824                                                        double __x) {
1825  return __extension__(__m128d){__x, __w};
1826}
1827 
1828/// Constructs a 128-bit floating-point vector of [2 x double],
1829///    initialized in reverse order with the specified double-precision
1830///    floating-point values.
1831///
1832/// \headerfile <x86intrin.h>
1833///
1834/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
1835///
1836/// \param __w
1837///    A double-precision floating-point value used to initialize the lower 64
1838///    bits of the result.
1839/// \param __x
1840///    A double-precision floating-point value used to initialize the upper 64
1841///    bits of the result.
1842/// \returns An initialized 128-bit floating-point vector of [2 x double].
1843static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w,
1844                                                         double __x) {
1845  return __extension__(__m128d){__w, __x};
1846}
1847 
1848/// Constructs a 128-bit floating-point vector of [2 x double]
1849///    initialized to zero.
1850///
1851/// \headerfile <x86intrin.h>
1852///
1853/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
1854///
1855/// \returns An initialized 128-bit floating-point vector of [2 x double] with
1856///    all elements set to zero.
1857static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) {
1858  return __extension__(__m128d){0.0, 0.0};
1859}
1860 
1861/// Constructs a 128-bit floating-point vector of [2 x double]. The lower
1862///    64 bits are set to the lower 64 bits of the second parameter. The upper
1863///    64 bits are set to the upper 64 bits of the first parameter.
1864///
1865/// \headerfile <x86intrin.h>
1866///
1867/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.
1868///
1869/// \param __a
1870///    A 128-bit vector of [2 x double]. The upper 64 bits are written to the
1871///    upper 64 bits of the result.
1872/// \param __b
1873///    A 128-bit vector of [2 x double]. The lower 64 bits are written to the
1874///    lower 64 bits of the result.
1875/// \returns A 128-bit vector of [2 x double] containing the moved values.
1876static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a,
1877                                                         __m128d __b) {
1878  __a[0] = __b[0];
1879  return __a;
1880}
1881 
1882/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
1883///    memory location.
1884///
1885/// \headerfile <x86intrin.h>
1886///
1887/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.
1888///
1889/// \param __dp
1890///    A pointer to a 64-bit memory location.
1891/// \param __a
1892///    A 128-bit vector of [2 x double] containing the value to be stored.
1893static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp,
1894                                                       __m128d __a) {
1895  struct __mm_store_sd_struct {
1896    double __u;
1897  } __attribute__((__packed__, __may_alias__));
1898  ((struct __mm_store_sd_struct *)__dp)->__u = __a[0];
1899}
1900 
1901/// Moves packed double-precision values from a 128-bit vector of
1902///    [2 x double] to a memory location.
1903///
1904/// \headerfile <x86intrin.h>
1905///
1906/// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction.
1907///
1908/// \param __dp
1909///    A pointer to an aligned memory location that can store two
1910///    double-precision values.
1911/// \param __a
1912///    A packed 128-bit vector of [2 x double] containing the values to be
1913///    moved.
1914static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp,
1915                                                       __m128d __a) {
1916  *(__m128d *)__dp = __a;
1917}
1918 
1919/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
1920///    the upper and lower 64 bits of a memory location.
1921///
1922/// \headerfile <x86intrin.h>
1923///
1924/// This intrinsic corresponds to the
1925///   <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
1926///
1927/// \param __dp
1928///    A pointer to a memory location that can store two double-precision
1929///    values.
1930/// \param __a
1931///    A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
1932///    of the values in \a __dp.
1933static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp,
1934                                                        __m128d __a) {
1935  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
1936  _mm_store_pd(__dp, __a);
1937}
1938 
1939/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
1940///    the upper and lower 64 bits of a memory location.
1941///
1942/// \headerfile <x86intrin.h>
1943///
1944/// This intrinsic corresponds to the
1945///   <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
1946///
1947/// \param __dp
1948///    A pointer to a memory location that can store two double-precision
1949///    values.
1950/// \param __a
1951///    A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
1952///    of the values in \a __dp.
1953static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp,
1954                                                        __m128d __a) {
1955  _mm_store1_pd(__dp, __a);
1956}
1957 
1958/// Stores a 128-bit vector of [2 x double] into an unaligned memory
1959///    location.
1960///
1961/// \headerfile <x86intrin.h>
1962///
1963/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.
1964///
1965/// \param __dp
1966///    A pointer to a 128-bit memory location. The address of the memory
1967///    location does not have to be aligned.
1968/// \param __a
1969///    A 128-bit vector of [2 x double] containing the values to be stored.
1970static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp,
1971                                                        __m128d __a) {
1972  struct __storeu_pd {
1973    __m128d_u __v;
1974  } __attribute__((__packed__, __may_alias__));
1975  ((struct __storeu_pd *)__dp)->__v = __a;
1976}
1977 
1978/// Stores two double-precision values, in reverse order, from a 128-bit
1979///    vector of [2 x double] to a 16-byte aligned memory location.
1980///
1981/// \headerfile <x86intrin.h>
1982///
1983/// This intrinsic corresponds to a shuffling instruction followed by a
1984/// <c> VMOVAPD / MOVAPD </c> instruction.
1985///
1986/// \param __dp
1987///    A pointer to a 16-byte aligned memory location that can store two
1988///    double-precision values.
1989/// \param __a
1990///    A 128-bit vector of [2 x double] containing the values to be reversed and
1991///    stored.
1992static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp,
1993                                                        __m128d __a) {
1994  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);
1995  *(__m128d *)__dp = __a;
1996}
1997 
1998/// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a
1999///    memory location.
2000///
2001/// \headerfile <x86intrin.h>
2002///
2003/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.
2004///
2005/// \param __dp
2006///    A pointer to a 64-bit memory location.
2007/// \param __a
2008///    A 128-bit vector of [2 x double] containing the value to be stored.
2009static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp,
2010                                                        __m128d __a) {
2011  struct __mm_storeh_pd_struct {
2012    double __u;
2013  } __attribute__((__packed__, __may_alias__));
2014  ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[1];
2015}
2016 
2017/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
2018///    memory location.
2019///
2020/// \headerfile <x86intrin.h>
2021///
2022/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.
2023///
2024/// \param __dp
2025///    A pointer to a 64-bit memory location.
2026/// \param __a
2027///    A 128-bit vector of [2 x double] containing the value to be stored.
2028static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp,
2029                                                        __m128d __a) {
2030  struct __mm_storeh_pd_struct {
2031    double __u;
2032  } __attribute__((__packed__, __may_alias__));
2033  ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[0];
2034}
2035 
2036/// Adds the corresponding elements of two 128-bit vectors of [16 x i8],
2037///    saving the lower 8 bits of each sum in the corresponding element of a
2038///    128-bit result vector of [16 x i8].
2039///
2040///    The integer elements of both parameters can be either signed or unsigned.
2041///
2042/// \headerfile <x86intrin.h>
2043///
2044/// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction.
2045///
2046/// \param __a
2047///    A 128-bit vector of [16 x i8].
2048/// \param __b
2049///    A 128-bit vector of [16 x i8].
2050/// \returns A 128-bit vector of [16 x i8] containing the sums of both
2051///    parameters.
2052static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a,
2053                                                          __m128i __b) {
2054  return (__m128i)((__v16qu)__a + (__v16qu)__b);
2055}
2056 
2057/// Adds the corresponding elements of two 128-bit vectors of [8 x i16],
2058///    saving the lower 16 bits of each sum in the corresponding element of a
2059///    128-bit result vector of [8 x i16].
2060///
2061///    The integer elements of both parameters can be either signed or unsigned.
2062///
2063/// \headerfile <x86intrin.h>
2064///
2065/// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction.
2066///
2067/// \param __a
2068///    A 128-bit vector of [8 x i16].
2069/// \param __b
2070///    A 128-bit vector of [8 x i16].
2071/// \returns A 128-bit vector of [8 x i16] containing the sums of both
2072///    parameters.
2073static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a,
2074                                                           __m128i __b) {
2075  return (__m128i)((__v8hu)__a + (__v8hu)__b);
2076}
2077 
2078/// Adds the corresponding elements of two 128-bit vectors of [4 x i32],
2079///    saving the lower 32 bits of each sum in the corresponding element of a
2080///    128-bit result vector of [4 x i32].
2081///
2082///    The integer elements of both parameters can be either signed or unsigned.
2083///
2084/// \headerfile <x86intrin.h>
2085///
2086/// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction.
2087///
2088/// \param __a
2089///    A 128-bit vector of [4 x i32].
2090/// \param __b
2091///    A 128-bit vector of [4 x i32].
2092/// \returns A 128-bit vector of [4 x i32] containing the sums of both
2093///    parameters.
2094static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a,
2095                                                           __m128i __b) {
2096  return (__m128i)((__v4su)__a + (__v4su)__b);
2097}
2098 
2099/// Adds two signed or unsigned 64-bit integer values, returning the
2100///    lower 64 bits of the sum.
2101///
2102/// \headerfile <x86intrin.h>
2103///
2104/// This intrinsic corresponds to the <c> PADDQ </c> instruction.
2105///
2106/// \param __a
2107///    A 64-bit integer.
2108/// \param __b
2109///    A 64-bit integer.
2110/// \returns A 64-bit integer containing the sum of both parameters.
2111static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a,
2112                                                            __m64 __b) {
2113  return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);
2114}
2115 
2116/// Adds the corresponding elements of two 128-bit vectors of [2 x i64],
2117///    saving the lower 64 bits of each sum in the corresponding element of a
2118///    128-bit result vector of [2 x i64].
2119///
2120///    The integer elements of both parameters can be either signed or unsigned.
2121///
2122/// \headerfile <x86intrin.h>
2123///
2124/// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction.
2125///
2126/// \param __a
2127///    A 128-bit vector of [2 x i64].
2128/// \param __b
2129///    A 128-bit vector of [2 x i64].
2130/// \returns A 128-bit vector of [2 x i64] containing the sums of both
2131///    parameters.
2132static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a,
2133                                                           __m128i __b) {
2134  return (__m128i)((__v2du)__a + (__v2du)__b);
2135}
2136 
2137/// Adds, with saturation, the corresponding elements of two 128-bit
2138///    signed [16 x i8] vectors, saving each sum in the corresponding element
2139///    of a 128-bit result vector of [16 x i8].
2140///
2141///    Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
2142///    less than 0x80 are saturated to 0x80.
2143///
2144/// \headerfile <x86intrin.h>
2145///
2146/// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction.
2147///
2148/// \param __a
2149///    A 128-bit signed [16 x i8] vector.
2150/// \param __b
2151///    A 128-bit signed [16 x i8] vector.
2152/// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of
2153///    both parameters.
2154static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a,
2155                                                           __m128i __b) {
2156  return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b);
2157}
2158 
2159/// Adds, with saturation, the corresponding elements of two 128-bit
2160///    signed [8 x i16] vectors, saving each sum in the corresponding element
2161///    of a 128-bit result vector of [8 x i16].
2162///
2163///    Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
2164///    less than 0x8000 are saturated to 0x8000.
2165///
2166/// \headerfile <x86intrin.h>
2167///
2168/// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction.
2169///
2170/// \param __a
2171///    A 128-bit signed [8 x i16] vector.
2172/// \param __b
2173///    A 128-bit signed [8 x i16] vector.
2174/// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of
2175///    both parameters.
2176static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a,
2177                                                            __m128i __b) {
2178  return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b);
2179}
2180 
2181/// Adds, with saturation, the corresponding elements of two 128-bit
2182///    unsigned [16 x i8] vectors, saving each sum in the corresponding element
2183///    of a 128-bit result vector of [16 x i8].
2184///
2185///    Positive sums greater than 0xFF are saturated to 0xFF. Negative sums are
2186///    saturated to 0x00.
2187///
2188/// \headerfile <x86intrin.h>
2189///
2190/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.
2191///
2192/// \param __a
2193///    A 128-bit unsigned [16 x i8] vector.
2194/// \param __b
2195///    A 128-bit unsigned [16 x i8] vector.
2196/// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums
2197///    of both parameters.
2198static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a,
2199                                                           __m128i __b) {
2200  return (__m128i)__builtin_elementwise_add_sat((__v16qu)__a, (__v16qu)__b);
2201}
2202 
2203/// Adds, with saturation, the corresponding elements of two 128-bit
2204///    unsigned [8 x i16] vectors, saving each sum in the corresponding element
2205///    of a 128-bit result vector of [8 x i16].
2206///
2207///    Positive sums greater than 0xFFFF are saturated to 0xFFFF. Negative sums
2208///    are saturated to 0x0000.
2209///
2210/// \headerfile <x86intrin.h>
2211///
2212/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.
2213///
2214/// \param __a
2215///    A 128-bit unsigned [8 x i16] vector.
2216/// \param __b
2217///    A 128-bit unsigned [8 x i16] vector.
2218/// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums
2219///    of both parameters.
2220static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a,
2221                                                            __m128i __b) {
2222  return (__m128i)__builtin_elementwise_add_sat((__v8hu)__a, (__v8hu)__b);
2223}
2224 
2225/// Computes the rounded averages of corresponding elements of two
2226///    128-bit unsigned [16 x i8] vectors, saving each result in the
2227///    corresponding element of a 128-bit result vector of [16 x i8].
2228///
2229/// \headerfile <x86intrin.h>
2230///
2231/// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction.
2232///
2233/// \param __a
2234///    A 128-bit unsigned [16 x i8] vector.
2235/// \param __b
2236///    A 128-bit unsigned [16 x i8] vector.
2237/// \returns A 128-bit unsigned [16 x i8] vector containing the rounded
2238///    averages of both parameters.
2239static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a,
2240                                                          __m128i __b) {
2241  return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
2242}
2243 
2244/// Computes the rounded averages of corresponding elements of two
2245///    128-bit unsigned [8 x i16] vectors, saving each result in the
2246///    corresponding element of a 128-bit result vector of [8 x i16].
2247///
2248/// \headerfile <x86intrin.h>
2249///
2250/// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction.
2251///
2252/// \param __a
2253///    A 128-bit unsigned [8 x i16] vector.
2254/// \param __b
2255///    A 128-bit unsigned [8 x i16] vector.
2256/// \returns A 128-bit unsigned [8 x i16] vector containing the rounded
2257///    averages of both parameters.
2258static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a,
2259                                                           __m128i __b) {
2260  return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
2261}
2262 
2263/// Multiplies the corresponding elements of two 128-bit signed [8 x i16]
2264///    vectors, producing eight intermediate 32-bit signed integer products, and
2265///    adds the consecutive pairs of 32-bit products to form a 128-bit signed
2266///    [4 x i32] vector.
2267///
2268///    For example, bits [15:0] of both parameters are multiplied producing a
2269///    32-bit product, bits [31:16] of both parameters are multiplied producing
2270///    a 32-bit product, and the sum of those two products becomes bits [31:0]
2271///    of the result.
2272///
2273/// \headerfile <x86intrin.h>
2274///
2275/// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction.
2276///
2277/// \param __a
2278///    A 128-bit signed [8 x i16] vector.
2279/// \param __b
2280///    A 128-bit signed [8 x i16] vector.
2281/// \returns A 128-bit signed [4 x i32] vector containing the sums of products
2282///    of both parameters.
2283static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a,
2284                                                            __m128i __b) {
2285  return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
2286}
2287 
2288/// Compares corresponding elements of two 128-bit signed [8 x i16]
2289///    vectors, saving the greater value from each comparison in the
2290///    corresponding element of a 128-bit result vector of [8 x i16].
2291///
2292/// \headerfile <x86intrin.h>
2293///
2294/// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction.
2295///
2296/// \param __a
2297///    A 128-bit signed [8 x i16] vector.
2298/// \param __b
2299///    A 128-bit signed [8 x i16] vector.
2300/// \returns A 128-bit signed [8 x i16] vector containing the greater value of
2301///    each comparison.
2302static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a,
2303                                                           __m128i __b) {
2304  return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b);
2305}
2306 
2307/// Compares corresponding elements of two 128-bit unsigned [16 x i8]
2308///    vectors, saving the greater value from each comparison in the
2309///    corresponding element of a 128-bit result vector of [16 x i8].
2310///
2311/// \headerfile <x86intrin.h>
2312///
2313/// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction.
2314///
2315/// \param __a
2316///    A 128-bit unsigned [16 x i8] vector.
2317/// \param __b
2318///    A 128-bit unsigned [16 x i8] vector.
2319/// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of
2320///    each comparison.
2321static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a,
2322                                                          __m128i __b) {
2323  return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b);
2324}
2325 
2326/// Compares corresponding elements of two 128-bit signed [8 x i16]
2327///    vectors, saving the smaller value from each comparison in the
2328///    corresponding element of a 128-bit result vector of [8 x i16].
2329///
2330/// \headerfile <x86intrin.h>
2331///
2332/// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction.
2333///
2334/// \param __a
2335///    A 128-bit signed [8 x i16] vector.
2336/// \param __b
2337///    A 128-bit signed [8 x i16] vector.
2338/// \returns A 128-bit signed [8 x i16] vector containing the smaller value of
2339///    each comparison.
2340static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a,
2341                                                           __m128i __b) {
2342  return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b);
2343}
2344 
2345/// Compares corresponding elements of two 128-bit unsigned [16 x i8]
2346///    vectors, saving the smaller value from each comparison in the
2347///    corresponding element of a 128-bit result vector of [16 x i8].
2348///
2349/// \headerfile <x86intrin.h>
2350///
2351/// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction.
2352///
2353/// \param __a
2354///    A 128-bit unsigned [16 x i8] vector.
2355/// \param __b
2356///    A 128-bit unsigned [16 x i8] vector.
2357/// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of
2358///    each comparison.
2359static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a,
2360                                                          __m128i __b) {
2361  return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b);
2362}
2363 
2364/// Multiplies the corresponding elements of two signed [8 x i16]
2365///    vectors, saving the upper 16 bits of each 32-bit product in the
2366///    corresponding element of a 128-bit signed [8 x i16] result vector.
2367///
2368/// \headerfile <x86intrin.h>
2369///
2370/// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction.
2371///
2372/// \param __a
2373///    A 128-bit signed [8 x i16] vector.
2374/// \param __b
2375///    A 128-bit signed [8 x i16] vector.
2376/// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of
2377///    each of the eight 32-bit products.
2378static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a,
2379                                                             __m128i __b) {
2380  return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
2381}
2382 
2383/// Multiplies the corresponding elements of two unsigned [8 x i16]
2384///    vectors, saving the upper 16 bits of each 32-bit product in the
2385///    corresponding element of a 128-bit unsigned [8 x i16] result vector.
2386///
2387/// \headerfile <x86intrin.h>
2388///
2389/// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction.
2390///
2391/// \param __a
2392///    A 128-bit unsigned [8 x i16] vector.
2393/// \param __b
2394///    A 128-bit unsigned [8 x i16] vector.
2395/// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits
2396///    of each of the eight 32-bit products.
2397static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a,
2398                                                             __m128i __b) {
2399  return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
2400}
2401 
2402/// Multiplies the corresponding elements of two signed [8 x i16]
2403///    vectors, saving the lower 16 bits of each 32-bit product in the
2404///    corresponding element of a 128-bit signed [8 x i16] result vector.
2405///
2406/// \headerfile <x86intrin.h>
2407///
2408/// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction.
2409///
2410/// \param __a
2411///    A 128-bit signed [8 x i16] vector.
2412/// \param __b
2413///    A 128-bit signed [8 x i16] vector.
2414/// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of
2415///    each of the eight 32-bit products.
2416static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a,
2417                                                             __m128i __b) {
2418  return (__m128i)((__v8hu)__a * (__v8hu)__b);
2419}
2420 
2421/// Multiplies 32-bit unsigned integer values contained in the lower bits
2422///    of the two 64-bit integer vectors and returns the 64-bit unsigned
2423///    product.
2424///
2425/// \headerfile <x86intrin.h>
2426///
2427/// This intrinsic corresponds to the <c> PMULUDQ </c> instruction.
2428///
2429/// \param __a
2430///    A 64-bit integer containing one of the source operands.
2431/// \param __b
2432///    A 64-bit integer containing one of the source operands.
2433/// \returns A 64-bit integer vector containing the product of both operands.
2434static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mul_su32(__m64 __a,
2435                                                            __m64 __b) {
2436  return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
2437}
2438 
2439/// Multiplies 32-bit unsigned integer values contained in the lower
2440///    bits of the corresponding elements of two [2 x i64] vectors, and returns
2441///    the 64-bit products in the corresponding elements of a [2 x i64] vector.
2442///
2443/// \headerfile <x86intrin.h>
2444///
2445/// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction.
2446///
2447/// \param __a
2448///    A [2 x i64] vector containing one of the source operands.
2449/// \param __b
2450///    A [2 x i64] vector containing one of the source operands.
2451/// \returns A [2 x i64] vector containing the product of both operands.
2452static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a,
2453                                                           __m128i __b) {
2454  return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
2455}
2456 
2457/// Computes the absolute differences of corresponding 8-bit integer
2458///    values in two 128-bit vectors. Sums the first 8 absolute differences, and
2459///    separately sums the second 8 absolute differences. Packs these two
2460///    unsigned 16-bit integer sums into the upper and lower elements of a
2461///    [2 x i64] vector.
2462///
2463/// \headerfile <x86intrin.h>
2464///
2465/// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction.
2466///
2467/// \param __a
2468///    A 128-bit integer vector containing one of the source operands.
2469/// \param __b
2470///    A 128-bit integer vector containing one of the source operands.
2471/// \returns A [2 x i64] vector containing the sums of the sets of absolute
2472///    differences between both operands.
2473static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a,
2474                                                          __m128i __b) {
2475  return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
2476}
2477 
2478/// Subtracts the corresponding 8-bit integer values in the operands.
2479///
2480/// \headerfile <x86intrin.h>
2481///
2482/// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction.
2483///
2484/// \param __a
2485///    A 128-bit integer vector containing the minuends.
2486/// \param __b
2487///    A 128-bit integer vector containing the subtrahends.
2488/// \returns A 128-bit integer vector containing the differences of the values
2489///    in the operands.
2490static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a,
2491                                                          __m128i __b) {
2492  return (__m128i)((__v16qu)__a - (__v16qu)__b);
2493}
2494 
2495/// Subtracts the corresponding 16-bit integer values in the operands.
2496///
2497/// \headerfile <x86intrin.h>
2498///
2499/// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction.
2500///
2501/// \param __a
2502///    A 128-bit integer vector containing the minuends.
2503/// \param __b
2504///    A 128-bit integer vector containing the subtrahends.
2505/// \returns A 128-bit integer vector containing the differences of the values
2506///    in the operands.
2507static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a,
2508                                                           __m128i __b) {
2509  return (__m128i)((__v8hu)__a - (__v8hu)__b);
2510}
2511 
2512/// Subtracts the corresponding 32-bit integer values in the operands.
2513///
2514/// \headerfile <x86intrin.h>
2515///
2516/// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction.
2517///
2518/// \param __a
2519///    A 128-bit integer vector containing the minuends.
2520/// \param __b
2521///    A 128-bit integer vector containing the subtrahends.
2522/// \returns A 128-bit integer vector containing the differences of the values
2523///    in the operands.
2524static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a,
2525                                                           __m128i __b) {
2526  return (__m128i)((__v4su)__a - (__v4su)__b);
2527}
2528 
2529/// Subtracts signed or unsigned 64-bit integer values and writes the
2530///    difference to the corresponding bits in the destination.
2531///
2532/// \headerfile <x86intrin.h>
2533///
2534/// This intrinsic corresponds to the <c> PSUBQ </c> instruction.
2535///
2536/// \param __a
2537///    A 64-bit integer vector containing the minuend.
2538/// \param __b
2539///    A 64-bit integer vector containing the subtrahend.
2540/// \returns A 64-bit integer vector containing the difference of the values in
2541///    the operands.
2542static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a,
2543                                                            __m64 __b) {
2544  return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);
2545}
2546 
2547/// Subtracts the corresponding elements of two [2 x i64] vectors.
2548///
2549/// \headerfile <x86intrin.h>
2550///
2551/// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction.
2552///
2553/// \param __a
2554///    A 128-bit integer vector containing the minuends.
2555/// \param __b
2556///    A 128-bit integer vector containing the subtrahends.
2557/// \returns A 128-bit integer vector containing the differences of the values
2558///    in the operands.
2559static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a,
2560                                                           __m128i __b) {
2561  return (__m128i)((__v2du)__a - (__v2du)__b);
2562}
2563 
2564/// Subtracts, with saturation, corresponding 8-bit signed integer values in
2565///    the input and returns the differences in the corresponding bytes in the
2566///    destination.
2567///
2568///    Differences greater than 0x7F are saturated to 0x7F, and differences
2569///    less than 0x80 are saturated to 0x80.
2570///
2571/// \headerfile <x86intrin.h>
2572///
2573/// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction.
2574///
2575/// \param __a
2576///    A 128-bit integer vector containing the minuends.
2577/// \param __b
2578///    A 128-bit integer vector containing the subtrahends.
2579/// \returns A 128-bit integer vector containing the differences of the values
2580///    in the operands.
2581static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a,
2582                                                           __m128i __b) {
2583  return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b);
2584}
2585 
2586/// Subtracts, with saturation, corresponding 16-bit signed integer values in
2587///    the input and returns the differences in the corresponding bytes in the
2588///    destination.
2589///
2590///    Differences greater than 0x7FFF are saturated to 0x7FFF, and values less
2591///    than 0x8000 are saturated to 0x8000.
2592///
2593/// \headerfile <x86intrin.h>
2594///
2595/// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction.
2596///
2597/// \param __a
2598///    A 128-bit integer vector containing the minuends.
2599/// \param __b
2600///    A 128-bit integer vector containing the subtrahends.
2601/// \returns A 128-bit integer vector containing the differences of the values
2602///    in the operands.
2603static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a,
2604                                                            __m128i __b) {
2605  return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b);
2606}
2607 
2608/// Subtracts, with saturation, corresponding 8-bit unsigned integer values in
2609///    the input and returns the differences in the corresponding bytes in the
2610///    destination.
2611///
2612///    Differences less than 0x00 are saturated to 0x00.
2613///
2614/// \headerfile <x86intrin.h>
2615///
2616/// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction.
2617///
2618/// \param __a
2619///    A 128-bit integer vector containing the minuends.
2620/// \param __b
2621///    A 128-bit integer vector containing the subtrahends.
2622/// \returns A 128-bit integer vector containing the unsigned integer
2623///    differences of the values in the operands.
2624static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a,
2625                                                           __m128i __b) {
2626  return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b);
2627}
2628 
2629/// Subtracts, with saturation, corresponding 16-bit unsigned integer values in
2630///    the input and returns the differences in the corresponding bytes in the
2631///    destination.
2632///
2633///    Differences less than 0x0000 are saturated to 0x0000.
2634///
2635/// \headerfile <x86intrin.h>
2636///
2637/// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction.
2638///
2639/// \param __a
2640///    A 128-bit integer vector containing the minuends.
2641/// \param __b
2642///    A 128-bit integer vector containing the subtrahends.
2643/// \returns A 128-bit integer vector containing the unsigned integer
2644///    differences of the values in the operands.
2645static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a,
2646                                                            __m128i __b) {
2647  return (__m128i)__builtin_elementwise_sub_sat((__v8hu)__a, (__v8hu)__b);
2648}
2649 
2650/// Performs a bitwise AND of two 128-bit integer vectors.
2651///
2652/// \headerfile <x86intrin.h>
2653///
2654/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.
2655///
2656/// \param __a
2657///    A 128-bit integer vector containing one of the source operands.
2658/// \param __b
2659///    A 128-bit integer vector containing one of the source operands.
2660/// \returns A 128-bit integer vector containing the bitwise AND of the values
2661///    in both operands.
2662static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a,
2663                                                           __m128i __b) {
2664  return (__m128i)((__v2du)__a & (__v2du)__b);
2665}
2666 
2667/// Performs a bitwise AND of two 128-bit integer vectors, using the
2668///    one's complement of the values contained in the first source operand.
2669///
2670/// \headerfile <x86intrin.h>
2671///
2672/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.
2673///
2674/// \param __a
2675///    A 128-bit vector containing the left source operand. The one's complement
2676///    of this value is used in the bitwise AND.
2677/// \param __b
2678///    A 128-bit vector containing the right source operand.
2679/// \returns A 128-bit integer vector containing the bitwise AND of the one's
2680///    complement of the first operand and the values in the second operand.
2681static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a,
2682                                                              __m128i __b) {
2683  return (__m128i)(~(__v2du)__a & (__v2du)__b);
2684}
2685/// Performs a bitwise OR of two 128-bit integer vectors.
2686///
2687/// \headerfile <x86intrin.h>
2688///
2689/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.
2690///
2691/// \param __a
2692///    A 128-bit integer vector containing one of the source operands.
2693/// \param __b
2694///    A 128-bit integer vector containing one of the source operands.
2695/// \returns A 128-bit integer vector containing the bitwise OR of the values
2696///    in both operands.
2697static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a,
2698                                                          __m128i __b) {
2699  return (__m128i)((__v2du)__a | (__v2du)__b);
2700}
2701 
2702/// Performs a bitwise exclusive OR of two 128-bit integer vectors.
2703///
2704/// \headerfile <x86intrin.h>
2705///
2706/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.
2707///
2708/// \param __a
2709///    A 128-bit integer vector containing one of the source operands.
2710/// \param __b
2711///    A 128-bit integer vector containing one of the source operands.
2712/// \returns A 128-bit integer vector containing the bitwise exclusive OR of the
2713///    values in both operands.
2714static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a,
2715                                                           __m128i __b) {
2716  return (__m128i)((__v2du)__a ^ (__v2du)__b);
2717}
2718 
2719/// Left-shifts the 128-bit integer vector operand by the specified
2720///    number of bytes. Low-order bits are cleared.
2721///
2722/// \headerfile <x86intrin.h>
2723///
2724/// \code
2725/// __m128i _mm_slli_si128(__m128i a, const int imm);
2726/// \endcode
2727///
2728/// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction.
2729///
2730/// \param a
2731///    A 128-bit integer vector containing the source operand.
2732/// \param imm
2733///    An immediate value specifying the number of bytes to left-shift operand
2734///    \a a.
2735/// \returns A 128-bit integer vector containing the left-shifted value.
2736#define _mm_slli_si128(a, imm)((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i
)(a), (int)(imm)))                                                 \
2737  ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a),          \
2738                                                (int)(imm)))
2739 
2740#define _mm_bslli_si128(a, imm)((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i
)(a), (int)(imm)))                                                \
2741  ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a),          \
2742                                                (int)(imm)))
2743 
2744/// Left-shifts each 16-bit value in the 128-bit integer vector operand
2745///    by the specified number of bits. Low-order bits are cleared.
2746///
2747/// \headerfile <x86intrin.h>
2748///
2749/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.
2750///
2751/// \param __a
2752///    A 128-bit integer vector containing the source operand.
2753/// \param __count
2754///    An integer value specifying the number of bits to left-shift each value
2755///    in operand \a __a.
2756/// \returns A 128-bit integer vector containing the left-shifted values.
2757static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a,
2758                                                            int __count) {
2759  return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
2760}
2761 
2762/// Left-shifts each 16-bit value in the 128-bit integer vector operand
2763///    by the specified number of bits. Low-order bits are cleared.
2764///
2765/// \headerfile <x86intrin.h>
2766///
2767/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.
2768///
2769/// \param __a
2770///    A 128-bit integer vector containing the source operand.
2771/// \param __count
2772///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2773///    to left-shift each value in operand \a __a.
2774/// \returns A 128-bit integer vector containing the left-shifted values.
2775static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a,
2776                                                           __m128i __count) {
2777  return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
2778}
2779 
2780/// Left-shifts each 32-bit value in the 128-bit integer vector operand
2781///    by the specified number of bits. Low-order bits are cleared.
2782///
2783/// \headerfile <x86intrin.h>
2784///
2785/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.
2786///
2787/// \param __a
2788///    A 128-bit integer vector containing the source operand.
2789/// \param __count
2790///    An integer value specifying the number of bits to left-shift each value
2791///    in operand \a __a.
2792/// \returns A 128-bit integer vector containing the left-shifted values.
2793static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a,
2794                                                            int __count) {
2795  return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
2796}
2797 
2798/// Left-shifts each 32-bit value in the 128-bit integer vector operand
2799///    by the specified number of bits. Low-order bits are cleared.
2800///
2801/// \headerfile <x86intrin.h>
2802///
2803/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.
2804///
2805/// \param __a
2806///    A 128-bit integer vector containing the source operand.
2807/// \param __count
2808///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2809///    to left-shift each value in operand \a __a.
2810/// \returns A 128-bit integer vector containing the left-shifted values.
2811static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a,
2812                                                           __m128i __count) {
2813  return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
2814}
2815 
2816/// Left-shifts each 64-bit value in the 128-bit integer vector operand
2817///    by the specified number of bits. Low-order bits are cleared.
2818///
2819/// \headerfile <x86intrin.h>
2820///
2821/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.
2822///
2823/// \param __a
2824///    A 128-bit integer vector containing the source operand.
2825/// \param __count
2826///    An integer value specifying the number of bits to left-shift each value
2827///    in operand \a __a.
2828/// \returns A 128-bit integer vector containing the left-shifted values.
2829static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a,
2830                                                            int __count) {
2831  return __builtin_ia32_psllqi128((__v2di)__a, __count);
2832}
2833 
2834/// Left-shifts each 64-bit value in the 128-bit integer vector operand
2835///    by the specified number of bits. Low-order bits are cleared.
2836///
2837/// \headerfile <x86intrin.h>
2838///
2839/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.
2840///
2841/// \param __a
2842///    A 128-bit integer vector containing the source operand.
2843/// \param __count
2844///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2845///    to left-shift each value in operand \a __a.
2846/// \returns A 128-bit integer vector containing the left-shifted values.
2847static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a,
2848                                                           __m128i __count) {
2849  return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);
2850}
2851 
2852/// Right-shifts each 16-bit value in the 128-bit integer vector operand
2853///    by the specified number of bits. High-order bits are filled with the sign
2854///    bit of the initial value.
2855///
2856/// \headerfile <x86intrin.h>
2857///
2858/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.
2859///
2860/// \param __a
2861///    A 128-bit integer vector containing the source operand.
2862/// \param __count
2863///    An integer value specifying the number of bits to right-shift each value
2864///    in operand \a __a.
2865/// \returns A 128-bit integer vector containing the right-shifted values.
2866static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a,
2867                                                            int __count) {
2868  return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
2869}
2870 
2871/// Right-shifts each 16-bit value in the 128-bit integer vector operand
2872///    by the specified number of bits. High-order bits are filled with the sign
2873///    bit of the initial value.
2874///
2875/// \headerfile <x86intrin.h>
2876///
2877/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.
2878///
2879/// \param __a
2880///    A 128-bit integer vector containing the source operand.
2881/// \param __count
2882///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2883///    to right-shift each value in operand \a __a.
2884/// \returns A 128-bit integer vector containing the right-shifted values.
2885static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a,
2886                                                           __m128i __count) {
2887  return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
2888}
2889 
2890/// Right-shifts each 32-bit value in the 128-bit integer vector operand
2891///    by the specified number of bits. High-order bits are filled with the sign
2892///    bit of the initial value.
2893///
2894/// \headerfile <x86intrin.h>
2895///
2896/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.
2897///
2898/// \param __a
2899///    A 128-bit integer vector containing the source operand.
2900/// \param __count
2901///    An integer value specifying the number of bits to right-shift each value
2902///    in operand \a __a.
2903/// \returns A 128-bit integer vector containing the right-shifted values.
2904static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a,
2905                                                            int __count) {
2906  return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
2907}
2908 
2909/// Right-shifts each 32-bit value in the 128-bit integer vector operand
2910///    by the specified number of bits. High-order bits are filled with the sign
2911///    bit of the initial value.
2912///
2913/// \headerfile <x86intrin.h>
2914///
2915/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.
2916///
2917/// \param __a
2918///    A 128-bit integer vector containing the source operand.
2919/// \param __count
2920///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2921///    to right-shift each value in operand \a __a.
2922/// \returns A 128-bit integer vector containing the right-shifted values.
2923static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a,
2924                                                           __m128i __count) {
2925  return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
2926}
2927 
2928/// Right-shifts the 128-bit integer vector operand by the specified
2929///    number of bytes. High-order bits are cleared.
2930///
2931/// \headerfile <x86intrin.h>
2932///
2933/// \code
2934/// __m128i _mm_srli_si128(__m128i a, const int imm);
2935/// \endcode
2936///
2937/// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction.
2938///
2939/// \param a
2940///    A 128-bit integer vector containing the source operand.
2941/// \param imm
2942///    An immediate value specifying the number of bytes to right-shift operand
2943///    \a a.
2944/// \returns A 128-bit integer vector containing the right-shifted value.
2945#define _mm_srli_si128(a, imm)((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i
)(a), (int)(imm)))                                                 \
2946  ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a),          \
2947                                                (int)(imm)))
2948 
2949#define _mm_bsrli_si128(a, imm)((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i
)(a), (int)(imm)))                                                \
2950  ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a),          \
2951                                                (int)(imm)))
2952 
2953/// Right-shifts each of 16-bit values in the 128-bit integer vector
2954///    operand by the specified number of bits. High-order bits are cleared.
2955///
2956/// \headerfile <x86intrin.h>
2957///
2958/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.
2959///
2960/// \param __a
2961///    A 128-bit integer vector containing the source operand.
2962/// \param __count
2963///    An integer value specifying the number of bits to right-shift each value
2964///    in operand \a __a.
2965/// \returns A 128-bit integer vector containing the right-shifted values.
2966static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a,
2967                                                            int __count) {
2968  return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
2969}
2970 
2971/// Right-shifts each of 16-bit values in the 128-bit integer vector
2972///    operand by the specified number of bits. High-order bits are cleared.
2973///
2974/// \headerfile <x86intrin.h>
2975///
2976/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.
2977///
2978/// \param __a
2979///    A 128-bit integer vector containing the source operand.
2980/// \param __count
2981///    A 128-bit integer vector in which bits [63:0] specify the number of bits
2982///    to right-shift each value in operand \a __a.
2983/// \returns A 128-bit integer vector containing the right-shifted values.
2984static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a,
2985                                                           __m128i __count) {
2986  return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
2987}
2988 
2989/// Right-shifts each of 32-bit values in the 128-bit integer vector
2990///    operand by the specified number of bits. High-order bits are cleared.
2991///
2992/// \headerfile <x86intrin.h>
2993///
2994/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.
2995///
2996/// \param __a
2997///    A 128-bit integer vector containing the source operand.
2998/// \param __count
2999///    An integer value specifying the number of bits to right-shift each value
3000///    in operand \a __a.
3001/// \returns A 128-bit integer vector containing the right-shifted values.
3002static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a,
3003                                                            int __count) {
3004  return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
3005}
3006 
3007/// Right-shifts each of 32-bit values in the 128-bit integer vector
3008///    operand by the specified number of bits. High-order bits are cleared.
3009///
3010/// \headerfile <x86intrin.h>
3011///
3012/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.
3013///
3014/// \param __a
3015///    A 128-bit integer vector containing the source operand.
3016/// \param __count
3017///    A 128-bit integer vector in which bits [63:0] specify the number of bits
3018///    to right-shift each value in operand \a __a.
3019/// \returns A 128-bit integer vector containing the right-shifted values.
3020static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a,
3021                                                           __m128i __count) {
3022  return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
3023}
3024 
3025/// Right-shifts each of 64-bit values in the 128-bit integer vector
3026///    operand by the specified number of bits. High-order bits are cleared.
3027///
3028/// \headerfile <x86intrin.h>
3029///
3030/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.
3031///
3032/// \param __a
3033///    A 128-bit integer vector containing the source operand.
3034/// \param __count
3035///    An integer value specifying the number of bits to right-shift each value
3036///    in operand \a __a.
3037/// \returns A 128-bit integer vector containing the right-shifted values.
3038static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a,
3039                                                            int __count) {
3040  return __builtin_ia32_psrlqi128((__v2di)__a, __count);
3041}
3042 
3043/// Right-shifts each of 64-bit values in the 128-bit integer vector
3044///    operand by the specified number of bits. High-order bits are cleared.
3045///
3046/// \headerfile <x86intrin.h>
3047///
3048/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.
3049///
3050/// \param __a
3051///    A 128-bit integer vector containing the source operand.
3052/// \param __count
3053///    A 128-bit integer vector in which bits [63:0] specify the number of bits
3054///    to right-shift each value in operand \a __a.
3055/// \returns A 128-bit integer vector containing the right-shifted values.
3056static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a,
3057                                                           __m128i __count) {
3058  return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);
3059}
3060 
3061/// Compares each of the corresponding 8-bit values of the 128-bit
3062///    integer vectors for equality.
3063///
3064///    Each comparison returns 0x0 for false, 0xFF for true.
3065///
3066/// \headerfile <x86intrin.h>
3067///
3068/// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction.
3069///
3070/// \param __a
3071///    A 128-bit integer vector.
3072/// \param __b
3073///    A 128-bit integer vector.
3074/// \returns A 128-bit integer vector containing the comparison results.
3075static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a,
3076                                                            __m128i __b) {
3077  return (__m128i)((__v16qi)__a == (__v16qi)__b);
3078}
3079 
3080/// Compares each of the corresponding 16-bit values of the 128-bit
3081///    integer vectors for equality.
3082///
3083///    Each comparison returns 0x0 for false, 0xFFFF for true.
3084///
3085/// \headerfile <x86intrin.h>
3086///
3087/// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction.
3088///
3089/// \param __a
3090///    A 128-bit integer vector.
3091/// \param __b
3092///    A 128-bit integer vector.
3093/// \returns A 128-bit integer vector containing the comparison results.
3094static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a,
3095                                                             __m128i __b) {
3096  return (__m128i)((__v8hi)__a == (__v8hi)__b);
3097}
3098 
3099/// Compares each of the corresponding 32-bit values of the 128-bit
3100///    integer vectors for equality.
3101///
3102///    Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
3103///
3104/// \headerfile <x86intrin.h>
3105///
3106/// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction.
3107///
3108/// \param __a
3109///    A 128-bit integer vector.
3110/// \param __b
3111///    A 128-bit integer vector.
3112/// \returns A 128-bit integer vector containing the comparison results.
3113static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a,
3114                                                             __m128i __b) {
3115  return (__m128i)((__v4si)__a == (__v4si)__b);
3116}
3117 
3118/// Compares each of the corresponding signed 8-bit values of the 128-bit
3119///    integer vectors to determine if the values in the first operand are
3120///    greater than those in the second operand.
3121///
3122///    Each comparison returns 0x0 for false, 0xFF for true.
3123///
3124/// \headerfile <x86intrin.h>
3125///
3126/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.
3127///
3128/// \param __a
3129///    A 128-bit integer vector.
3130/// \param __b
3131///    A 128-bit integer vector.
3132/// \returns A 128-bit integer vector containing the comparison results.
3133static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a,
3134                                                            __m128i __b) {
3135  /* This function always performs a signed comparison, but __v16qi is a char
3136     which may be signed or unsigned, so use __v16qs. */
3137  return (__m128i)((__v16qs)__a > (__v16qs)__b);
3138}
3139 
3140/// Compares each of the corresponding signed 16-bit values of the
3141///    128-bit integer vectors to determine if the values in the first operand
3142///    are greater than those in the second operand.
3143///
3144///    Each comparison returns 0x0 for false, 0xFFFF for true.
3145///
3146/// \headerfile <x86intrin.h>
3147///
3148/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.
3149///
3150/// \param __a
3151///    A 128-bit integer vector.
3152/// \param __b
3153///    A 128-bit integer vector.
3154/// \returns A 128-bit integer vector containing the comparison results.
3155static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a,
3156                                                             __m128i __b) {
3157  return (__m128i)((__v8hi)__a > (__v8hi)__b);
3158}
3159 
3160/// Compares each of the corresponding signed 32-bit values of the
3161///    128-bit integer vectors to determine if the values in the first operand
3162///    are greater than those in the second operand.
3163///
3164///    Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
3165///
3166/// \headerfile <x86intrin.h>
3167///
3168/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.
3169///
3170/// \param __a
3171///    A 128-bit integer vector.
3172/// \param __b
3173///    A 128-bit integer vector.
3174/// \returns A 128-bit integer vector containing the comparison results.
3175static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a,
3176                                                             __m128i __b) {
3177  return (__m128i)((__v4si)__a > (__v4si)__b);
3178}
3179 
3180/// Compares each of the corresponding signed 8-bit values of the 128-bit
3181///    integer vectors to determine if the values in the first operand are less
3182///    than those in the second operand.
3183///
3184///    Each comparison returns 0x0 for false, 0xFF for true.
3185///
3186/// \headerfile <x86intrin.h>
3187///
3188/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.
3189///
3190/// \param __a
3191///    A 128-bit integer vector.
3192/// \param __b
3193///    A 128-bit integer vector.
3194/// \returns A 128-bit integer vector containing the comparison results.
3195static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a,
3196                                                            __m128i __b) {
3197  return _mm_cmpgt_epi8(__b, __a);
3198}
3199 
3200/// Compares each of the corresponding signed 16-bit values of the
3201///    128-bit integer vectors to determine if the values in the first operand
3202///    are less than those in the second operand.
3203///
3204///    Each comparison returns 0x0 for false, 0xFFFF for true.
3205///
3206/// \headerfile <x86intrin.h>
3207///
3208/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.
3209///
3210/// \param __a
3211///    A 128-bit integer vector.
3212/// \param __b
3213///    A 128-bit integer vector.
3214/// \returns A 128-bit integer vector containing the comparison results.
3215static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a,
3216                                                             __m128i __b) {
3217  return _mm_cmpgt_epi16(__b, __a);
3218}
3219 
3220/// Compares each of the corresponding signed 32-bit values of the
3221///    128-bit integer vectors to determine if the values in the first operand
3222///    are less than those in the second operand.
3223///
3224///    Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
3225///
3226/// \headerfile <x86intrin.h>
3227///
3228/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.
3229///
3230/// \param __a
3231///    A 128-bit integer vector.
3232/// \param __b
3233///    A 128-bit integer vector.
3234/// \returns A 128-bit integer vector containing the comparison results.
3235static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a,
3236                                                             __m128i __b) {
3237  return _mm_cmpgt_epi32(__b, __a);
3238}
3239 
3240#ifdef __x86_64__1
3241/// Converts a 64-bit signed integer value from the second operand into a
3242///    double-precision value and returns it in the lower element of a [2 x
3243///    double] vector; the upper element of the returned vector is copied from
3244///    the upper element of the first operand.
3245///
3246/// \headerfile <x86intrin.h>
3247///
3248/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.
3249///
3250/// \param __a
3251///    A 128-bit vector of [2 x double]. The upper 64 bits of this operand are
3252///    copied to the upper 64 bits of the destination.
3253/// \param __b
3254///    A 64-bit signed integer operand containing the value to be converted.
3255/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
3256///    converted value of the second operand. The upper 64 bits are copied from
3257///    the upper 64 bits of the first operand.
3258static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi64_sd(__m128d __a,
3259                                                            long long __b) {
3260  __a[0] = __b;
3261  return __a;
3262}
3263 
3264/// Converts the first (lower) element of a vector of [2 x double] into a
3265///    64-bit signed integer value.
3266///
3267///    If the converted value does not fit in a 64-bit integer, raises a
3268///    floating-point invalid exception. If the exception is masked, returns
3269///    the most negative integer.
3270///
3271/// \headerfile <x86intrin.h>
3272///
3273/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.
3274///
3275/// \param __a
3276///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
3277///    conversion.
3278/// \returns A 64-bit signed integer containing the converted value.
3279static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsd_si64(__m128d __a) {
3280  return __builtin_ia32_cvtsd2si64((__v2df)__a);
3281}
3282 
3283/// Converts the first (lower) element of a vector of [2 x double] into a
3284///    64-bit signed truncated (rounded toward zero) integer value.
3285///
3286///    If a converted value does not fit in a 64-bit integer, raises a
3287///    floating-point invalid exception. If the exception is masked, returns
3288///    the most negative integer.
3289///
3290/// \headerfile <x86intrin.h>
3291///
3292/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
3293///   instruction.
3294///
3295/// \param __a
3296///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
3297///    conversion.
3298/// \returns A 64-bit signed integer containing the converted value.
3299static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttsd_si64(__m128d __a) {
3300  return __builtin_ia32_cvttsd2si64((__v2df)__a);
3301}
3302#endif
3303 
3304/// Converts a vector of [4 x i32] into a vector of [4 x float].
3305///
3306/// \headerfile <x86intrin.h>
3307///
3308/// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction.
3309///
3310/// \param __a
3311///    A 128-bit integer vector.
3312/// \returns A 128-bit vector of [4 x float] containing the converted values.
3313static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a) {
3314  return (__m128) __builtin_convertvector((__v4si)__a, __v4sf);
3315}
3316 
3317/// Converts a vector of [4 x float] into a vector of [4 x i32].
3318///
3319///    If a converted value does not fit in a 32-bit integer, raises a
3320///    floating-point invalid exception. If the exception is masked, returns
3321///    the most negative integer.
3322///
3323/// \headerfile <x86intrin.h>
3324///
3325/// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction.
3326///
3327/// \param __a
3328///    A 128-bit vector of [4 x float].
3329/// \returns A 128-bit integer vector of [4 x i32] containing the converted
3330///    values.
3331static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a) {
3332  return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);
3333}
3334 
3335/// Converts a vector of [4 x float] into four signed truncated (rounded toward
3336///    zero) 32-bit integers, returned in a vector of [4 x i32].
3337///
3338///    If a converted value does not fit in a 32-bit integer, raises a
3339///    floating-point invalid exception. If the exception is masked, returns
3340///    the most negative integer.
3341///
3342/// \headerfile <x86intrin.h>
3343///
3344/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c>
3345///   instruction.
3346///
3347/// \param __a
3348///    A 128-bit vector of [4 x float].
3349/// \returns A 128-bit vector of [4 x i32] containing the converted values.
3350static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) {
3351  return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
3352}
3353 
3354/// Returns a vector of [4 x i32] where the lowest element is the input
3355///    operand and the remaining elements are zero.
3356///
3357/// \headerfile <x86intrin.h>
3358///
3359/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
3360///
3361/// \param __a
3362///    A 32-bit signed integer operand.
3363/// \returns A 128-bit vector of [4 x i32].
3364static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) {
3365  return __extension__(__m128i)(__v4si){__a, 0, 0, 0};
3366}
3367 
3368/// Returns a vector of [2 x i64] where the lower element is the input
3369///    operand and the upper element is zero.
3370///
3371/// \headerfile <x86intrin.h>
3372///
3373/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction
3374/// in 64-bit mode.
3375///
3376/// \param __a
3377///    A 64-bit signed integer operand containing the value to be converted.
3378/// \returns A 128-bit vector of [2 x i64] containing the converted value.
3379static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) {
3380  return __extension__(__m128i)(__v2di){__a, 0};
3381}
3382 
3383/// Moves the least significant 32 bits of a vector of [4 x i32] to a
3384///    32-bit signed integer value.
3385///
3386/// \headerfile <x86intrin.h>
3387///
3388/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
3389///
3390/// \param __a
3391///    A vector of [4 x i32]. The least significant 32 bits are moved to the
3392///    destination.
3393/// \returns A 32-bit signed integer containing the moved value.
3394static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) {
3395  __v4si __b = (__v4si)__a;
3396  return __b[0];
3397}
3398 
3399/// Moves the least significant 64 bits of a vector of [2 x i64] to a
3400///    64-bit signed integer value.
3401///
3402/// \headerfile <x86intrin.h>
3403///
3404/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
3405///
3406/// \param __a
3407///    A vector of [2 x i64]. The least significant 64 bits are moved to the
3408///    destination.
3409/// \returns A 64-bit signed integer containing the moved value.
3410static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsi128_si64(__m128i __a) {
3411  return __a[0];
3412}
3413 
3414/// Moves packed integer values from an aligned 128-bit memory location
3415///    to elements in a 128-bit integer vector.
3416///
3417/// \headerfile <x86intrin.h>
3418///
3419/// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction.
3420///
3421/// \param __p
3422///    An aligned pointer to a memory location containing integer values.
3423/// \returns A 128-bit integer vector containing the moved values.
3424static __inline__ __m128i __DEFAULT_FN_ATTRS
3425_mm_load_si128(__m128i const *__p) {
3426  return *__p;
3427}
3428 
3429/// Moves packed integer values from an unaligned 128-bit memory location
3430///    to elements in a 128-bit integer vector.
3431///
3432/// \headerfile <x86intrin.h>
3433///
3434/// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction.
3435///
3436/// \param __p
3437///    A pointer to a memory location containing integer values.
3438/// \returns A 128-bit integer vector containing the moved values.
3439static __inline__ __m128i __DEFAULT_FN_ATTRS
3440_mm_loadu_si128(__m128i_u const *__p) {
3441  struct __loadu_si128 {
3442    __m128i_u __v;
3443  } __attribute__((__packed__, __may_alias__));
3444  return ((const struct __loadu_si128 *)__p)->__v;
14
←
Access to field '__v' results in a dereference of a null pointer (loaded from variable '__p')
3445}
3446 
3447/// Returns a vector of [2 x i64] where the lower element is taken from
3448///    the lower element of the operand, and the upper element is zero.
3449///
3450/// \headerfile <x86intrin.h>
3451///
3452/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
3453///
3454/// \param __p
3455///    A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of
3456///    the destination.
3457/// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the
3458///    moved value. The higher order bits are cleared.
3459static __inline__ __m128i __DEFAULT_FN_ATTRS
3460_mm_loadl_epi64(__m128i_u const *__p) {
3461  struct __mm_loadl_epi64_struct {
3462    long long __u;
3463  } __attribute__((__packed__, __may_alias__));
3464  return __extension__(__m128i){
3465      ((const struct __mm_loadl_epi64_struct *)__p)->__u, 0};
3466}
3467 
3468/// Generates a 128-bit vector of [4 x i32] with unspecified content.
3469///    This could be used as an argument to another intrinsic function where the
3470///    argument is required but the value is not actually used.
3471///
3472/// \headerfile <x86intrin.h>
3473///
3474/// This intrinsic has no corresponding instruction.
3475///
3476/// \returns A 128-bit vector of [4 x i32] with unspecified content.
3477static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) {
3478  return (__m128i)__builtin_ia32_undef128();
3479}
3480 
3481/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
3482///    the specified 64-bit integer values.
3483///
3484/// \headerfile <x86intrin.h>
3485///
3486/// This intrinsic is a utility function and does not correspond to a specific
3487///    instruction.
3488///
3489/// \param __q1
3490///    A 64-bit integer value used to initialize the upper 64 bits of the
3491///    destination vector of [2 x i64].
3492/// \param __q0
3493///    A 64-bit integer value used to initialize the lower 64 bits of the
3494///    destination vector of [2 x i64].
3495/// \returns An initialized 128-bit vector of [2 x i64] containing the values
3496///    provided in the operands.
3497static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1,
3498                                                            long long __q0) {
3499  return __extension__(__m128i)(__v2di){__q0, __q1};
3500}
3501 
3502/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
3503///    the specified 64-bit integer values.
3504///
3505/// \headerfile <x86intrin.h>
3506///
3507/// This intrinsic is a utility function and does not correspond to a specific
3508///    instruction.
3509///
3510/// \param __q1
3511///    A 64-bit integer value used to initialize the upper 64 bits of the
3512///    destination vector of [2 x i64].
3513/// \param __q0
3514///    A 64-bit integer value used to initialize the lower 64 bits of the
3515///    destination vector of [2 x i64].
3516/// \returns An initialized 128-bit vector of [2 x i64] containing the values
3517///    provided in the operands.
3518static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1,
3519                                                           __m64 __q0) {
3520  return _mm_set_epi64x((long long)__q1, (long long)__q0);
3521}
3522 
3523/// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with
3524///    the specified 32-bit integer values.
3525///
3526/// \headerfile <x86intrin.h>
3527///
3528/// This intrinsic is a utility function and does not correspond to a specific
3529///    instruction.
3530///
3531/// \param __i3
3532///    A 32-bit integer value used to initialize bits [127:96] of the
3533///    destination vector.
3534/// \param __i2
3535///    A 32-bit integer value used to initialize bits [95:64] of the destination
3536///    vector.
3537/// \param __i1
3538///    A 32-bit integer value used to initialize bits [63:32] of the destination
3539///    vector.
3540/// \param __i0
3541///    A 32-bit integer value used to initialize bits [31:0] of the destination
3542///    vector.
3543/// \returns An initialized 128-bit vector of [4 x i32] containing the values
3544///    provided in the operands.
3545static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2,
3546                                                           int __i1, int __i0) {
3547  return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3};
3548}
3549 
3550/// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with
3551///    the specified 16-bit integer values.
3552///
3553/// \headerfile <x86intrin.h>
3554///
3555/// This intrinsic is a utility function and does not correspond to a specific
3556///    instruction.
3557///
3558/// \param __w7
3559///    A 16-bit integer value used to initialize bits [127:112] of the
3560///    destination vector.
3561/// \param __w6
3562///    A 16-bit integer value used to initialize bits [111:96] of the
3563///    destination vector.
3564/// \param __w5
3565///    A 16-bit integer value used to initialize bits [95:80] of the destination
3566///    vector.
3567/// \param __w4
3568///    A 16-bit integer value used to initialize bits [79:64] of the destination
3569///    vector.
3570/// \param __w3
3571///    A 16-bit integer value used to initialize bits [63:48] of the destination
3572///    vector.
3573/// \param __w2
3574///    A 16-bit integer value used to initialize bits [47:32] of the destination
3575///    vector.
3576/// \param __w1
3577///    A 16-bit integer value used to initialize bits [31:16] of the destination
3578///    vector.
3579/// \param __w0
3580///    A 16-bit integer value used to initialize bits [15:0] of the destination
3581///    vector.
3582/// \returns An initialized 128-bit vector of [8 x i16] containing the values
3583///    provided in the operands.
3584static __inline__ __m128i __DEFAULT_FN_ATTRS
3585_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3,
3586              short __w2, short __w1, short __w0) {
3587  return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3,
3588                                        __w4, __w5, __w6, __w7};
3589}
3590 
3591/// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with
3592///    the specified 8-bit integer values.
3593///
3594/// \headerfile <x86intrin.h>
3595///
3596/// This intrinsic is a utility function and does not correspond to a specific
3597///    instruction.
3598///
3599/// \param __b15
3600///    Initializes bits [127:120] of the destination vector.
3601/// \param __b14
3602///    Initializes bits [119:112] of the destination vector.
3603/// \param __b13
3604///    Initializes bits [111:104] of the destination vector.
3605/// \param __b12
3606///    Initializes bits [103:96] of the destination vector.
3607/// \param __b11
3608///    Initializes bits [95:88] of the destination vector.
3609/// \param __b10
3610///    Initializes bits [87:80] of the destination vector.
3611/// \param __b9
3612///    Initializes bits [79:72] of the destination vector.
3613/// \param __b8
3614///    Initializes bits [71:64] of the destination vector.
3615/// \param __b7
3616///    Initializes bits [63:56] of the destination vector.
3617/// \param __b6
3618///    Initializes bits [55:48] of the destination vector.
3619/// \param __b5
3620///    Initializes bits [47:40] of the destination vector.
3621/// \param __b4
3622///    Initializes bits [39:32] of the destination vector.
3623/// \param __b3
3624///    Initializes bits [31:24] of the destination vector.
3625/// \param __b2
3626///    Initializes bits [23:16] of the destination vector.
3627/// \param __b1
3628///    Initializes bits [15:8] of the destination vector.
3629/// \param __b0
3630///    Initializes bits [7:0] of the destination vector.
3631/// \returns An initialized 128-bit vector of [16 x i8] containing the values
3632///    provided in the operands.
3633static __inline__ __m128i __DEFAULT_FN_ATTRS
3634_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11,
3635             char __b10, char __b9, char __b8, char __b7, char __b6, char __b5,
3636             char __b4, char __b3, char __b2, char __b1, char __b0) {
3637  return __extension__(__m128i)(__v16qi){
3638      __b0, __b1, __b2,  __b3,  __b4,  __b5,  __b6,  __b7,
3639      __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15};
3640}
3641 
3642/// Initializes both values in a 128-bit integer vector with the
3643///    specified 64-bit integer value.
3644///
3645/// \headerfile <x86intrin.h>
3646///
3647/// This intrinsic is a utility function and does not correspond to a specific
3648///    instruction.
3649///
3650/// \param __q
3651///    Integer value used to initialize the elements of the destination integer
3652///    vector.
3653/// \returns An initialized 128-bit integer vector of [2 x i64] with both
3654///    elements containing the value provided in the operand.
3655static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) {
3656  return _mm_set_epi64x(__q, __q);
3657}
3658 
3659/// Initializes both values in a 128-bit vector of [2 x i64] with the
3660///    specified 64-bit value.
3661///
3662/// \headerfile <x86intrin.h>
3663///
3664/// This intrinsic is a utility function and does not correspond to a specific
3665///    instruction.
3666///
3667/// \param __q
3668///    A 64-bit value used to initialize the elements of the destination integer
3669///    vector.
3670/// \returns An initialized 128-bit vector of [2 x i64] with all elements
3671///    containing the value provided in the operand.
3672static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) {
3673  return _mm_set_epi64(__q, __q);
3674}
3675 
3676/// Initializes all values in a 128-bit vector of [4 x i32] with the
3677///    specified 32-bit value.
3678///
3679/// \headerfile <x86intrin.h>
3680///
3681/// This intrinsic is a utility function and does not correspond to a specific
3682///    instruction.
3683///
3684/// \param __i
3685///    A 32-bit value used to initialize the elements of the destination integer
3686///    vector.
3687/// \returns An initialized 128-bit vector of [4 x i32] with all elements
3688///    containing the value provided in the operand.
3689static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) {
3690  return _mm_set_epi32(__i, __i, __i, __i);
3691}
3692 
3693/// Initializes all values in a 128-bit vector of [8 x i16] with the
3694///    specified 16-bit value.
3695///
3696/// \headerfile <x86intrin.h>
3697///
3698/// This intrinsic is a utility function and does not correspond to a specific
3699///    instruction.
3700///
3701/// \param __w
3702///    A 16-bit value used to initialize the elements of the destination integer
3703///    vector.
3704/// \returns An initialized 128-bit vector of [8 x i16] with all elements
3705///    containing the value provided in the operand.
3706static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) {
3707  return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);
3708}
3709 
3710/// Initializes all values in a 128-bit vector of [16 x i8] with the
3711///    specified 8-bit value.
3712///
3713/// \headerfile <x86intrin.h>
3714///
3715/// This intrinsic is a utility function and does not correspond to a specific
3716///    instruction.
3717///
3718/// \param __b
3719///    An 8-bit value used to initialize the elements of the destination integer
3720///    vector.
3721/// \returns An initialized 128-bit vector of [16 x i8] with all elements
3722///    containing the value provided in the operand.
3723static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) {
3724  return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
3725                      __b, __b, __b, __b, __b);
3726}
3727 
3728/// Constructs a 128-bit integer vector, initialized in reverse order
3729///     with the specified 64-bit integral values.
3730///
3731/// \headerfile <x86intrin.h>
3732///
3733/// This intrinsic does not correspond to a specific instruction.
3734///
3735/// \param __q0
3736///    A 64-bit integral value used to initialize the lower 64 bits of the
3737///    result.
3738/// \param __q1
3739///    A 64-bit integral value used to initialize the upper 64 bits of the
3740///    result.
3741/// \returns An initialized 128-bit integer vector.
3742static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0,
3743                                                            __m64 __q1) {
3744  return _mm_set_epi64(__q1, __q0);
3745}
3746 
3747/// Constructs a 128-bit integer vector, initialized in reverse order
3748///     with the specified 32-bit integral values.
3749///
3750/// \headerfile <x86intrin.h>
3751///
3752/// This intrinsic is a utility function and does not correspond to a specific
3753///    instruction.
3754///
3755/// \param __i0
3756///    A 32-bit integral value used to initialize bits [31:0] of the result.
3757/// \param __i1
3758///    A 32-bit integral value used to initialize bits [63:32] of the result.
3759/// \param __i2
3760///    A 32-bit integral value used to initialize bits [95:64] of the result.
3761/// \param __i3
3762///    A 32-bit integral value used to initialize bits [127:96] of the result.
3763/// \returns An initialized 128-bit integer vector.
3764static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1,
3765                                                            int __i2,
3766                                                            int __i3) {
3767  return _mm_set_epi32(__i3, __i2, __i1, __i0);
3768}
3769 
3770/// Constructs a 128-bit integer vector, initialized in reverse order
3771///     with the specified 16-bit integral values.
3772///
3773/// \headerfile <x86intrin.h>
3774///
3775/// This intrinsic is a utility function and does not correspond to a specific
3776///    instruction.
3777///
3778/// \param __w0
3779///    A 16-bit integral value used to initialize bits [15:0] of the result.
3780/// \param __w1
3781///    A 16-bit integral value used to initialize bits [31:16] of the result.
3782/// \param __w2
3783///    A 16-bit integral value used to initialize bits [47:32] of the result.
3784/// \param __w3
3785///    A 16-bit integral value used to initialize bits [63:48] of the result.
3786/// \param __w4
3787///    A 16-bit integral value used to initialize bits [79:64] of the result.
3788/// \param __w5
3789///    A 16-bit integral value used to initialize bits [95:80] of the result.
3790/// \param __w6
3791///    A 16-bit integral value used to initialize bits [111:96] of the result.
3792/// \param __w7
3793///    A 16-bit integral value used to initialize bits [127:112] of the result.
3794/// \returns An initialized 128-bit integer vector.
3795static __inline__ __m128i __DEFAULT_FN_ATTRS
3796_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4,
3797               short __w5, short __w6, short __w7) {
3798  return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);
3799}
3800 
3801/// Constructs a 128-bit integer vector, initialized in reverse order
3802///     with the specified 8-bit integral values.
3803///
3804/// \headerfile <x86intrin.h>
3805///
3806/// This intrinsic is a utility function and does not correspond to a specific
3807///    instruction.
3808///
3809/// \param __b0
3810///    An 8-bit integral value used to initialize bits [7:0] of the result.
3811/// \param __b1
3812///    An 8-bit integral value used to initialize bits [15:8] of the result.
3813/// \param __b2
3814///    An 8-bit integral value used to initialize bits [23:16] of the result.
3815/// \param __b3
3816///    An 8-bit integral value used to initialize bits [31:24] of the result.
3817/// \param __b4
3818///    An 8-bit integral value used to initialize bits [39:32] of the result.
3819/// \param __b5
3820///    An 8-bit integral value used to initialize bits [47:40] of the result.
3821/// \param __b6
3822///    An 8-bit integral value used to initialize bits [55:48] of the result.
3823/// \param __b7
3824///    An 8-bit integral value used to initialize bits [63:56] of the result.
3825/// \param __b8
3826///    An 8-bit integral value used to initialize bits [71:64] of the result.
3827/// \param __b9
3828///    An 8-bit integral value used to initialize bits [79:72] of the result.
3829/// \param __b10
3830///    An 8-bit integral value used to initialize bits [87:80] of the result.
3831/// \param __b11
3832///    An 8-bit integral value used to initialize bits [95:88] of the result.
3833/// \param __b12
3834///    An 8-bit integral value used to initialize bits [103:96] of the result.
3835/// \param __b13
3836///    An 8-bit integral value used to initialize bits [111:104] of the result.
3837/// \param __b14
3838///    An 8-bit integral value used to initialize bits [119:112] of the result.
3839/// \param __b15
3840///    An 8-bit integral value used to initialize bits [127:120] of the result.
3841/// \returns An initialized 128-bit integer vector.
3842static __inline__ __m128i __DEFAULT_FN_ATTRS
3843_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
3844              char __b6, char __b7, char __b8, char __b9, char __b10,
3845              char __b11, char __b12, char __b13, char __b14, char __b15) {
3846  return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8,
3847                      __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
3848}
3849 
3850/// Creates a 128-bit integer vector initialized to zero.
3851///
3852/// \headerfile <x86intrin.h>
3853///
3854/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
3855///
3856/// \returns An initialized 128-bit integer vector with all elements set to
3857///    zero.
3858static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void) {
3859  return __extension__(__m128i)(__v2di){0LL, 0LL};
3860}
3861 
3862/// Stores a 128-bit integer vector to a memory location aligned on a
3863///    128-bit boundary.
3864///
3865/// \headerfile <x86intrin.h>
3866///
3867/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.
3868///
3869/// \param __p
3870///    A pointer to an aligned memory location that will receive the integer
3871///    values.
3872/// \param __b
3873///    A 128-bit integer vector containing the values to be moved.
3874static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p,
3875                                                          __m128i __b) {
3876  *__p = __b;
3877}
3878 
3879/// Stores a 128-bit integer vector to an unaligned memory location.
3880///
3881/// \headerfile <x86intrin.h>
3882///
3883/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.
3884///
3885/// \param __p
3886///    A pointer to a memory location that will receive the integer values.
3887/// \param __b
3888///    A 128-bit integer vector containing the values to be moved.
3889static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i_u *__p,
3890                                                           __m128i __b) {
3891  struct __storeu_si128 {
3892    __m128i_u __v;
3893  } __attribute__((__packed__, __may_alias__));
3894  ((struct __storeu_si128 *)__p)->__v = __b;
3895}
3896 
3897/// Stores a 64-bit integer value from the low element of a 128-bit integer
3898///    vector.
3899///
3900/// \headerfile <x86intrin.h>
3901///
3902/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
3903///
3904/// \param __p
3905///    A pointer to a 64-bit memory location. The address of the memory
3906///    location does not have to be aligned.
3907/// \param __b
3908///    A 128-bit integer vector containing the value to be stored.
3909static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si64(void *__p,
3910                                                          __m128i __b) {
3911  struct __storeu_si64 {
3912    long long __v;
3913  } __attribute__((__packed__, __may_alias__));
3914  ((struct __storeu_si64 *)__p)->__v = ((__v2di)__b)[0];
3915}
3916 
3917/// Stores a 32-bit integer value from the low element of a 128-bit integer
3918///    vector.
3919///
3920/// \headerfile <x86intrin.h>
3921///
3922/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
3923///
3924/// \param __p
3925///    A pointer to a 32-bit memory location. The address of the memory
3926///    location does not have to be aligned.
3927/// \param __b
3928///    A 128-bit integer vector containing the value to be stored.
3929static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si32(void *__p,
3930                                                          __m128i __b) {
3931  struct __storeu_si32 {
3932    int __v;
3933  } __attribute__((__packed__, __may_alias__));
3934  ((struct __storeu_si32 *)__p)->__v = ((__v4si)__b)[0];
3935}
3936 
3937/// Stores a 16-bit integer value from the low element of a 128-bit integer
3938///    vector.
3939///
3940/// \headerfile <x86intrin.h>
3941///
3942/// This intrinsic does not correspond to a specific instruction.
3943///
3944/// \param __p
3945///    A pointer to a 16-bit memory location. The address of the memory
3946///    location does not have to be aligned.
3947/// \param __b
3948///    A 128-bit integer vector containing the value to be stored.
3949static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si16(void *__p,
3950                                                          __m128i __b) {
3951  struct __storeu_si16 {
3952    short __v;
3953  } __attribute__((__packed__, __may_alias__));
3954  ((struct __storeu_si16 *)__p)->__v = ((__v8hi)__b)[0];
3955}
3956 
3957/// Moves bytes selected by the mask from the first operand to the
3958///    specified unaligned memory location. When a mask bit is 1, the
3959///    corresponding byte is written, otherwise it is not written.
3960///
3961///    To minimize caching, the data is flagged as non-temporal (unlikely to be
3962///    used again soon). Exception and trap behavior for elements not selected
3963///    for storage to memory are implementation dependent.
3964///
3965/// \headerfile <x86intrin.h>
3966///
3967/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c>
3968///   instruction.
3969///
3970/// \param __d
3971///    A 128-bit integer vector containing the values to be moved.
3972/// \param __n
3973///    A 128-bit integer vector containing the mask. The most significant bit of
3974///    each byte represents the mask bits.
3975/// \param __p
3976///    A pointer to an unaligned 128-bit memory location where the specified
3977///    values are moved.
3978static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d,
3979                                                              __m128i __n,
3980                                                              char *__p) {
3981  __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
3982}
3983 
3984/// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to
3985///    a memory location.
3986///
3987/// \headerfile <x86intrin.h>
3988///
3989/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.
3990///
3991/// \param __p
3992///    A pointer to a 64-bit memory location that will receive the lower 64 bits
3993///    of the integer vector parameter.
3994/// \param __a
3995///    A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the
3996///    value to be stored.
3997static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p,
3998                                                           __m128i __a) {
3999  struct __mm_storel_epi64_struct {
4000    long long __u;
4001  } __attribute__((__packed__, __may_alias__));
4002  ((struct __mm_storel_epi64_struct *)__p)->__u = __a[0];
4003}
4004 
4005/// Stores a 128-bit floating point vector of [2 x double] to a 128-bit
4006///    aligned memory location.
4007///
4008///    To minimize caching, the data is flagged as non-temporal (unlikely to be
4009///    used again soon).
4010///
4011/// \headerfile <x86intrin.h>
4012///
4013/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.
4014///
4015/// \param __p
4016///    A pointer to the 128-bit aligned memory location used to store the value.
4017/// \param __a
4018///    A vector of [2 x double] containing the 64-bit values to be stored.
4019static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p,
4020                                                        __m128d __a) {
4021  __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p);
4022}
4023 
4024/// Stores a 128-bit integer vector to a 128-bit aligned memory location.
4025///
4026///    To minimize caching, the data is flagged as non-temporal (unlikely to be
4027///    used again soon).
4028///
4029/// \headerfile <x86intrin.h>
4030///
4031/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.
4032///
4033/// \param __p
4034///    A pointer to the 128-bit aligned memory location used to store the value.
4035/// \param __a
4036///    A 128-bit integer vector containing the values to be stored.
4037static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p,
4038                                                           __m128i __a) {
4039  __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p);
4040}
4041 
4042/// Stores a 32-bit integer value in the specified memory location.
4043///
4044///    To minimize caching, the data is flagged as non-temporal (unlikely to be
4045///    used again soon).
4046///
4047/// \headerfile <x86intrin.h>
4048///
4049/// This intrinsic corresponds to the <c> MOVNTI </c> instruction.
4050///
4051/// \param __p
4052///    A pointer to the 32-bit memory location used to store the value.
4053/// \param __a
4054///    A 32-bit integer containing the value to be stored.
4055static __inline__ void
4056    __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
4057    _mm_stream_si32(void *__p, int __a) {
4058  __builtin_ia32_movnti((int *)__p, __a);
4059}
4060 
4061#ifdef __x86_64__1
4062/// Stores a 64-bit integer value in the specified memory location.
4063///
4064///    To minimize caching, the data is flagged as non-temporal (unlikely to be
4065///    used again soon).
4066///
4067/// \headerfile <x86intrin.h>
4068///
4069/// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction.
4070///
4071/// \param __p
4072///    A pointer to the 64-bit memory location used to store the value.
4073/// \param __a
4074///    A 64-bit integer containing the value to be stored.
4075static __inline__ void
4076    __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
4077    _mm_stream_si64(void *__p, long long __a) {
4078  __builtin_ia32_movnti64((long long *)__p, __a);
4079}
4080#endif
4081 
4082#if defined(__cplusplus)
4083extern "C" {
4084#endif
4085 
4086/// The cache line containing \a __p is flushed and invalidated from all
4087///    caches in the coherency domain.
4088///
4089/// \headerfile <x86intrin.h>
4090///
4091/// This intrinsic corresponds to the <c> CLFLUSH </c> instruction.
4092///
4093/// \param __p
4094///    A pointer to the memory location used to identify the cache line to be
4095///    flushed.
4096void _mm_clflush(void const *__p);
4097 
4098/// Forces strong memory ordering (serialization) between load
4099///    instructions preceding this instruction and load instructions following
4100///    this instruction, ensuring the system completes all previous loads before
4101///    executing subsequent loads.
4102///
4103/// \headerfile <x86intrin.h>
4104///
4105/// This intrinsic corresponds to the <c> LFENCE </c> instruction.
4106///
4107void _mm_lfence(void);
4108 
4109/// Forces strong memory ordering (serialization) between load and store
4110///    instructions preceding this instruction and load and store instructions
4111///    following this instruction, ensuring that the system completes all
4112///    previous memory accesses before executing subsequent memory accesses.
4113///
4114/// \headerfile <x86intrin.h>
4115///
4116/// This intrinsic corresponds to the <c> MFENCE </c> instruction.
4117///
4118void _mm_mfence(void);
4119 
4120#if defined(__cplusplus)
4121} // extern "C"
4122#endif
4123 
4124/// Converts, with saturation, 16-bit signed integers from both 128-bit integer
4125///    vector operands into 8-bit signed integers, and packs the results into
4126///    the destination.
4127///
4128///    Positive values greater than 0x7F are saturated to 0x7F. Negative values
4129///    less than 0x80 are saturated to 0x80.
4130///
4131/// \headerfile <x86intrin.h>
4132///
4133/// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction.
4134///
4135/// \param __a
4136///   A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
4137///   written to the lower 64 bits of the result.
4138/// \param __b
4139///   A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
4140///   written to the higher 64 bits of the result.
4141/// \returns A 128-bit vector of [16 x i8] containing the converted values.
4142static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a,
4143                                                             __m128i __b) {
4144  return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
4145}
4146 
4147/// Converts, with saturation, 32-bit signed integers from both 128-bit integer
4148///    vector operands into 16-bit signed integers, and packs the results into
4149///    the destination.
4150///
4151///    Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
4152///    values less than 0x8000 are saturated to 0x8000.
4153///
4154/// \headerfile <x86intrin.h>
4155///
4156/// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction.
4157///
4158/// \param __a
4159///    A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
4160///    are written to the lower 64 bits of the result.
4161/// \param __b
4162///    A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
4163///    are written to the higher 64 bits of the result.
4164/// \returns A 128-bit vector of [8 x i16] containing the converted values.
4165static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a,
4166                                                             __m128i __b) {
4167  return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
4168}
4169 
4170/// Converts, with saturation, 16-bit signed integers from both 128-bit integer
4171///    vector operands into 8-bit unsigned integers, and packs the results into
4172///    the destination.
4173///
4174///    Values greater than 0xFF are saturated to 0xFF. Values less than 0x00
4175///    are saturated to 0x00.
4176///
4177/// \headerfile <x86intrin.h>
4178///
4179/// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction.
4180///
4181/// \param __a
4182///    A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
4183///    written to the lower 64 bits of the result.
4184/// \param __b
4185///    A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
4186///    written to the higher 64 bits of the result.
4187/// \returns A 128-bit vector of [16 x i8] containing the converted values.
4188static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a,
4189                                                              __m128i __b) {
4190  return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
4191}
4192 
4193/// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using
4194///    the immediate-value parameter as a selector.
4195///
4196/// \headerfile <x86intrin.h>
4197///
4198/// \code
4199/// __m128i _mm_extract_epi16(__m128i a, const int imm);
4200/// \endcode
4201///
4202/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.
4203///
4204/// \param a
4205///    A 128-bit integer vector.
4206/// \param imm
4207///    An immediate value. Bits [2:0] selects values from \a a to be assigned
4208///    to bits[15:0] of the result. \n
4209///    000: assign values from bits [15:0] of \a a. \n
4210///    001: assign values from bits [31:16] of \a a. \n
4211///    010: assign values from bits [47:32] of \a a. \n
4212///    011: assign values from bits [63:48] of \a a. \n
4213///    100: assign values from bits [79:64] of \a a. \n
4214///    101: assign values from bits [95:80] of \a a. \n
4215///    110: assign values from bits [111:96] of \a a. \n
4216///    111: assign values from bits [127:112] of \a a.
4217/// \returns An integer, whose lower 16 bits are selected from the 128-bit
4218///    integer vector parameter and the remaining bits are assigned zeros.
4219#define _mm_extract_epi16(a, imm)((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i
)(a), (int)(imm)))                                              \
4220  ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a),      \
4221                                                    (int)(imm)))
4222 
4223/// Constructs a 128-bit integer vector by first making a copy of the
4224///    128-bit integer vector parameter, and then inserting the lower 16 bits
4225///    of an integer parameter into an offset specified by the immediate-value
4226///    parameter.
4227///
4228/// \headerfile <x86intrin.h>
4229///
4230/// \code
4231/// __m128i _mm_insert_epi16(__m128i a, int b, const int imm);
4232/// \endcode
4233///
4234/// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.
4235///
4236/// \param a
4237///    A 128-bit integer vector of [8 x i16]. This vector is copied to the
4238///    result and then one of the eight elements in the result is replaced by
4239///    the lower 16 bits of \a b.
4240/// \param b
4241///    An integer. The lower 16 bits of this parameter are written to the
4242///    result beginning at an offset specified by \a imm.
4243/// \param imm
4244///    An immediate value specifying the bit offset in the result at which the
4245///    lower 16 bits of \a b are written.
4246/// \returns A 128-bit integer vector containing the constructed values.
4247#define _mm_insert_epi16(a, b, imm)((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (
int)(b), (int)(imm)))                                            \
4248  ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b),        \
4249                                        (int)(imm)))
4250 
4251/// Copies the values of the most significant bits from each 8-bit
4252///    element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask
4253///    value, zero-extends the value, and writes it to the destination.
4254///
4255/// \headerfile <x86intrin.h>
4256///
4257/// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction.
4258///
4259/// \param __a
4260///    A 128-bit integer vector containing the values with bits to be extracted.
4261/// \returns The most significant bits from each 8-bit element in \a __a,
4262///    written to bits [15:0]. The other bits are assigned zeros.
4263static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
4264  return __builtin_ia32_pmovmskb128((__v16qi)__a);
4265}
4266 
4267/// Constructs a 128-bit integer vector by shuffling four 32-bit
4268///    elements of a 128-bit integer vector parameter, using the immediate-value
4269///    parameter as a specifier.
4270///
4271/// \headerfile <x86intrin.h>
4272///
4273/// \code
4274/// __m128i _mm_shuffle_epi32(__m128i a, const int imm);
4275/// \endcode
4276///
4277/// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.
4278///
4279/// \param a
4280///    A 128-bit integer vector containing the values to be copied.
4281/// \param imm
4282///    An immediate value containing an 8-bit value specifying which elements to
4283///    copy from a. The destinations within the 128-bit destination are assigned
4284///    values as follows: \n
4285///    Bits [1:0] are used to assign values to bits [31:0] of the result. \n
4286///    Bits [3:2] are used to assign values to bits [63:32] of the result. \n
4287///    Bits [5:4] are used to assign values to bits [95:64] of the result. \n
4288///    Bits [7:6] are used to assign values to bits [127:96] of the result. \n
4289///    Bit value assignments: \n
4290///    00: assign values from bits [31:0] of \a a. \n
4291///    01: assign values from bits [63:32] of \a a. \n
4292///    10: assign values from bits [95:64] of \a a. \n
4293///    11: assign values from bits [127:96] of \a a. \n
4294///    Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
4295///    <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
4296///    <c>[b6, b4, b2, b0]</c>.
4297/// \returns A 128-bit integer vector containing the shuffled values.
4298#define _mm_shuffle_epi32(a, imm)((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm
)))                                              \
4299  ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)))
4300 
4301/// Constructs a 128-bit integer vector by shuffling four lower 16-bit
4302///    elements of a 128-bit integer vector of [8 x i16], using the immediate
4303///    value parameter as a specifier.
4304///
4305/// \headerfile <x86intrin.h>
4306///
4307/// \code
4308/// __m128i _mm_shufflelo_epi16(__m128i a, const int imm);
4309/// \endcode
4310///
4311/// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.
4312///
4313/// \param a
4314///    A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits
4315///    [127:64] of the result.
4316/// \param imm
4317///    An 8-bit immediate value specifying which elements to copy from \a a. \n
4318///    Bits[1:0] are used to assign values to bits [15:0] of the result. \n
4319///    Bits[3:2] are used to assign values to bits [31:16] of the result. \n
4320///    Bits[5:4] are used to assign values to bits [47:32] of the result. \n
4321///    Bits[7:6] are used to assign values to bits [63:48] of the result. \n
4322///    Bit value assignments: \n
4323///    00: assign values from bits [15:0] of \a a. \n
4324///    01: assign values from bits [31:16] of \a a. \n
4325///    10: assign values from bits [47:32] of \a a. \n
4326///    11: assign values from bits [63:48] of \a a. \n
4327///    Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
4328///    <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
4329///    <c>[b6, b4, b2, b0]</c>.
4330/// \returns A 128-bit integer vector containing the shuffled values.
4331#define _mm_shufflelo_epi16(a, imm)((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(
imm)))                                            \
4332  ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)))
4333 
4334/// Constructs a 128-bit integer vector by shuffling four upper 16-bit
4335///    elements of a 128-bit integer vector of [8 x i16], using the immediate
4336///    value parameter as a specifier.
4337///
4338/// \headerfile <x86intrin.h>
4339///
4340/// \code
4341/// __m128i _mm_shufflehi_epi16(__m128i a, const int imm);
4342/// \endcode
4343///
4344/// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction.
4345///
4346/// \param a
4347///    A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits
4348///    [63:0] of the result.
4349/// \param imm
4350///    An 8-bit immediate value specifying which elements to copy from \a a. \n
4351///    Bits[1:0] are used to assign values to bits [79:64] of the result. \n
4352///    Bits[3:2] are used to assign values to bits [95:80] of the result. \n
4353///    Bits[5:4] are used to assign values to bits [111:96] of the result. \n
4354///    Bits[7:6] are used to assign values to bits [127:112] of the result. \n
4355///    Bit value assignments: \n
4356///    00: assign values from bits [79:64] of \a a. \n
4357///    01: assign values from bits [95:80] of \a a. \n
4358///    10: assign values from bits [111:96] of \a a. \n
4359///    11: assign values from bits [127:112] of \a a. \n
4360///    Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
4361///    <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
4362///    <c>[b6, b4, b2, b0]</c>.
4363/// \returns A 128-bit integer vector containing the shuffled values.
4364#define _mm_shufflehi_epi16(a, imm)((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(
imm)))                                            \
4365  ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)))
4366 
4367/// Unpacks the high-order (index 8-15) values from two 128-bit vectors
4368///    of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
4369///
4370/// \headerfile <x86intrin.h>
4371///
4372/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c>
4373///   instruction.
4374///
4375/// \param __a
4376///    A 128-bit vector of [16 x i8].
4377///    Bits [71:64] are written to bits [7:0] of the result. \n
4378///    Bits [79:72] are written to bits [23:16] of the result. \n
4379///    Bits [87:80] are written to bits [39:32] of the result. \n
4380///    Bits [95:88] are written to bits [55:48] of the result. \n
4381///    Bits [103:96] are written to bits [71:64] of the result. \n
4382///    Bits [111:104] are written to bits [87:80] of the result. \n
4383///    Bits [119:112] are written to bits [103:96] of the result. \n
4384///    Bits [127:120] are written to bits [119:112] of the result.
4385/// \param __b
4386///    A 128-bit vector of [16 x i8]. \n
4387///    Bits [71:64] are written to bits [15:8] of the result. \n
4388///    Bits [79:72] are written to bits [31:24] of the result. \n
4389///    Bits [87:80] are written to bits [47:40] of the result. \n
4390///    Bits [95:88] are written to bits [63:56] of the result. \n
4391///    Bits [103:96] are written to bits [79:72] of the result. \n
4392///    Bits [111:104] are written to bits [95:88] of the result. \n
4393///    Bits [119:112] are written to bits [111:104] of the result. \n
4394///    Bits [127:120] are written to bits [127:120] of the result.
4395/// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
4396static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a,
4397                                                               __m128i __b) {
4398  return (__m128i)__builtin_shufflevector(
4399      (__v16qi)__a, (__v16qi)__b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11,
4400      16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15);
4401}
4402 
4403/// Unpacks the high-order (index 4-7) values from two 128-bit vectors of
4404///    [8 x i16] and interleaves them into a 128-bit vector of [8 x i16].
4405///
4406/// \headerfile <x86intrin.h>
4407///
4408/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c>
4409///   instruction.
4410///
4411/// \param __a
4412///    A 128-bit vector of [8 x i16].
4413///    Bits [79:64] are written to bits [15:0] of the result. \n
4414///    Bits [95:80] are written to bits [47:32] of the result. \n
4415///    Bits [111:96] are written to bits [79:64] of the result. \n
4416///    Bits [127:112] are written to bits [111:96] of the result.
4417/// \param __b
4418///    A 128-bit vector of [8 x i16].
4419///    Bits [79:64] are written to bits [31:16] of the result. \n
4420///    Bits [95:80] are written to bits [63:48] of the result. \n
4421///    Bits [111:96] are written to bits [95:80] of the result. \n
4422///    Bits [127:112] are written to bits [127:112] of the result.
4423/// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
4424static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a,
4425                                                                __m128i __b) {
4426  return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8 + 4, 5,
4427                                          8 + 5, 6, 8 + 6, 7, 8 + 7);
4428}
4429 
4430/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
4431///    [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].
4432///
4433/// \headerfile <x86intrin.h>
4434///
4435/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c>
4436///   instruction.
4437///
4438/// \param __a
4439///    A 128-bit vector of [4 x i32]. \n
4440///    Bits [95:64] are written to bits [31:0] of the destination. \n
4441///    Bits [127:96] are written to bits [95:64] of the destination.
4442/// \param __b
4443///    A 128-bit vector of [4 x i32]. \n
4444///    Bits [95:64] are written to bits [64:32] of the destination. \n
4445///    Bits [127:96] are written to bits [127:96] of the destination.
4446/// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
4447static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a,
4448                                                                __m128i __b) {
4449  return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4 + 2, 3,
4450                                          4 + 3);
4451}
4452 
4453/// Unpacks the high-order 64-bit elements from two 128-bit vectors of
4454///    [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
4455///
4456/// \headerfile <x86intrin.h>
4457///
4458/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c>
4459///   instruction.
4460///
4461/// \param __a
4462///    A 128-bit vector of [2 x i64]. \n
4463///    Bits [127:64] are written to bits [63:0] of the destination.
4464/// \param __b
4465///    A 128-bit vector of [2 x i64]. \n
4466///    Bits [127:64] are written to bits [127:64] of the destination.
4467/// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
4468static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a,
4469                                                                __m128i __b) {
4470  return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2 + 1);
4471}
4472 
4473/// Unpacks the low-order (index 0-7) values from two 128-bit vectors of
4474///    [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
4475///
4476/// \headerfile <x86intrin.h>
4477///
4478/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c>
4479///   instruction.
4480///
4481/// \param __a
4482///    A 128-bit vector of [16 x i8]. \n
4483///    Bits [7:0] are written to bits [7:0] of the result. \n
4484///    Bits [15:8] are written to bits [23:16] of the result. \n
4485///    Bits [23:16] are written to bits [39:32] of the result. \n
4486///    Bits [31:24] are written to bits [55:48] of the result. \n
4487///    Bits [39:32] are written to bits [71:64] of the result. \n
4488///    Bits [47:40] are written to bits [87:80] of the result. \n
4489///    Bits [55:48] are written to bits [103:96] of the result. \n
4490///    Bits [63:56] are written to bits [119:112] of the result.
4491/// \param __b
4492///    A 128-bit vector of [16 x i8].
4493///    Bits [7:0] are written to bits [15:8] of the result. \n
4494///    Bits [15:8] are written to bits [31:24] of the result. \n
4495///    Bits [23:16] are written to bits [47:40] of the result. \n
4496///    Bits [31:24] are written to bits [63:56] of the result. \n
4497///    Bits [39:32] are written to bits [79:72] of the result. \n
4498///    Bits [47:40] are written to bits [95:88] of the result. \n
4499///    Bits [55:48] are written to bits [111:104] of the result. \n
4500///    Bits [63:56] are written to bits [127:120] of the result.
4501/// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
4502static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a,
4503                                                               __m128i __b) {
4504  return (__m128i)__builtin_shufflevector(
4505      (__v16qi)__a, (__v16qi)__b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,
4506      16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7);
4507}
4508 
4509/// Unpacks the low-order (index 0-3) values from each of the two 128-bit
4510///    vectors of [8 x i16] and interleaves them into a 128-bit vector of
4511///    [8 x i16].
4512///
4513/// \headerfile <x86intrin.h>
4514///
4515/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c>
4516///   instruction.
4517///
4518/// \param __a
4519///    A 128-bit vector of [8 x i16].
4520///    Bits [15:0] are written to bits [15:0] of the result. \n
4521///    Bits [31:16] are written to bits [47:32] of the result. \n
4522///    Bits [47:32] are written to bits [79:64] of the result. \n
4523///    Bits [63:48] are written to bits [111:96] of the result.
4524/// \param __b
4525///    A 128-bit vector of [8 x i16].
4526///    Bits [15:0] are written to bits [31:16] of the result. \n
4527///    Bits [31:16] are written to bits [63:48] of the result. \n
4528///    Bits [47:32] are written to bits [95:80] of the result. \n
4529///    Bits [63:48] are written to bits [127:112] of the result.
4530/// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
4531static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a,
4532                                                                __m128i __b) {
4533  return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8 + 0, 1,
4534                                          8 + 1, 2, 8 + 2, 3, 8 + 3);
4535}
4536 
4537/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of
4538///    [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].
4539///
4540/// \headerfile <x86intrin.h>
4541///
4542/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c>
4543///   instruction.
4544///
4545/// \param __a
4546///    A 128-bit vector of [4 x i32]. \n
4547///    Bits [31:0] are written to bits [31:0] of the destination. \n
4548///    Bits [63:32] are written to bits [95:64] of the destination.
4549/// \param __b
4550///    A 128-bit vector of [4 x i32]. \n
4551///    Bits [31:0] are written to bits [64:32] of the destination. \n
4552///    Bits [63:32] are written to bits [127:96] of the destination.
4553/// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
4554static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a,
4555                                                                __m128i __b) {
4556  return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4 + 0, 1,
4557                                          4 + 1);
4558}
4559 
4560/// Unpacks the low-order 64-bit elements from two 128-bit vectors of
4561///    [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
4562///
4563/// \headerfile <x86intrin.h>
4564///
4565/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
4566///   instruction.
4567///
4568/// \param __a
4569///    A 128-bit vector of [2 x i64]. \n
4570///    Bits [63:0] are written to bits [63:0] of the destination. \n
4571/// \param __b
4572///    A 128-bit vector of [2 x i64]. \n
4573///    Bits [63:0] are written to bits [127:64] of the destination. \n
4574/// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
4575static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a,
4576                                                                __m128i __b) {
4577  return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2 + 0);
4578}
4579 
4580/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
4581///    integer.
4582///
4583/// \headerfile <x86intrin.h>
4584///
4585/// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction.
4586///
4587/// \param __a
4588///    A 128-bit integer vector operand. The lower 64 bits are moved to the
4589///    destination.
4590/// \returns A 64-bit integer containing the lower 64 bits of the parameter.
4591static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a) {
4592  return (__m64)__a[0];
4593}
4594 
4595/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
4596///    upper bits.
4597///
4598/// \headerfile <x86intrin.h>
4599///
4600/// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction.
4601///
4602/// \param __a
4603///    A 64-bit value.
4604/// \returns A 128-bit integer vector. The lower 64 bits contain the value from
4605///    the operand. The upper 64 bits are assigned zeros.
4606static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a) {
4607  return __extension__(__m128i)(__v2di){(long long)__a, 0};
4608}
4609 
4610/// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit
4611///    integer vector, zeroing the upper bits.
4612///
4613/// \headerfile <x86intrin.h>
4614///
4615/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
4616///
4617/// \param __a
4618///    A 128-bit integer vector operand. The lower 64 bits are moved to the
4619///    destination.
4620/// \returns A 128-bit integer vector. The lower 64 bits contain the value from
4621///    the operand. The upper 64 bits are assigned zeros.
4622static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a) {
4623  return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2);
4624}
4625 
4626/// Unpacks the high-order 64-bit elements from two 128-bit vectors of
4627///    [2 x double] and interleaves them into a 128-bit vector of [2 x
4628///    double].
4629///
4630/// \headerfile <x86intrin.h>
4631///
4632/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.
4633///
4634/// \param __a
4635///    A 128-bit vector of [2 x double]. \n
4636///    Bits [127:64] are written to bits [63:0] of the destination.
4637/// \param __b
4638///    A 128-bit vector of [2 x double]. \n
4639///    Bits [127:64] are written to bits [127:64] of the destination.
4640/// \returns A 128-bit vector of [2 x double] containing the interleaved values.
4641static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a,
4642                                                             __m128d __b) {
4643  return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2 + 1);
4644}
4645 
4646/// Unpacks the low-order 64-bit elements from two 128-bit vectors
4647///    of [2 x double] and interleaves them into a 128-bit vector of [2 x
4648///    double].
4649///
4650/// \headerfile <x86intrin.h>
4651///
4652/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
4653///
4654/// \param __a
4655///    A 128-bit vector of [2 x double]. \n
4656///    Bits [63:0] are written to bits [63:0] of the destination.
4657/// \param __b
4658///    A 128-bit vector of [2 x double]. \n
4659///    Bits [63:0] are written to bits [127:64] of the destination.
4660/// \returns A 128-bit vector of [2 x double] containing the interleaved values.
4661static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a,
4662                                                             __m128d __b) {
4663  return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2 + 0);
4664}
4665 
4666/// Extracts the sign bits of the double-precision values in the 128-bit
4667///    vector of [2 x double], zero-extends the value, and writes it to the
4668///    low-order bits of the destination.
4669///
4670/// \headerfile <x86intrin.h>
4671///
4672/// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction.
4673///
4674/// \param __a
4675///    A 128-bit vector of [2 x double] containing the values with sign bits to
4676///    be extracted.
4677/// \returns The sign bits from each of the double-precision elements in \a __a,
4678///    written to bits [1:0]. The remaining bits are assigned values of zero.
4679static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) {
4680  return __builtin_ia32_movmskpd((__v2df)__a);
4681}
4682 
4683/// Constructs a 128-bit floating-point vector of [2 x double] from two
4684///    128-bit vector parameters of [2 x double], using the immediate-value
4685///     parameter as a specifier.
4686///
4687/// \headerfile <x86intrin.h>
4688///
4689/// \code
4690/// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i);
4691/// \endcode
4692///
4693/// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction.
4694///
4695/// \param a
4696///    A 128-bit vector of [2 x double].
4697/// \param b
4698///    A 128-bit vector of [2 x double].
4699/// \param i
4700///    An 8-bit immediate value. The least significant two bits specify which
4701///    elements to copy from \a a and \a b: \n
4702///    Bit[0] = 0: lower element of \a a copied to lower element of result. \n
4703///    Bit[0] = 1: upper element of \a a copied to lower element of result. \n
4704///    Bit[1] = 0: lower element of \a b copied to upper element of result. \n
4705///    Bit[1] = 1: upper element of \a b copied to upper element of result. \n
4706///    Note: To generate a mask, you can use the \c _MM_SHUFFLE2 macro.
4707///    <c>_MM_SHUFFLE2(b1, b0)</c> can create a 2-bit mask of the form
4708///    <c>[b1, b0]</c>.
4709/// \returns A 128-bit vector of [2 x double] containing the shuffled values.
4710#define _mm_shuffle_pd(a, b, i)((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df
)(__m128d)(b), (int)(i)))                                                \
4711  ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b),  \
4712                                  (int)(i)))
4713 
4714/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
4715///    floating-point vector of [4 x float].
4716///
4717/// \headerfile <x86intrin.h>
4718///
4719/// This intrinsic has no corresponding instruction.
4720///
4721/// \param __a
4722///    A 128-bit floating-point vector of [2 x double].
4723/// \returns A 128-bit floating-point vector of [4 x float] containing the same
4724///    bitwise pattern as the parameter.
4725static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a) {
4726  return (__m128)__a;
4727}
4728 
4729/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
4730///    integer vector.
4731///
4732/// \headerfile <x86intrin.h>
4733///
4734/// This intrinsic has no corresponding instruction.
4735///
4736/// \param __a
4737///    A 128-bit floating-point vector of [2 x double].
4738/// \returns A 128-bit integer vector containing the same bitwise pattern as the
4739///    parameter.
4740static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a) {
4741  return (__m128i)__a;
4742}
4743 
4744/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
4745///    floating-point vector of [2 x double].
4746///
4747/// \headerfile <x86intrin.h>
4748///
4749/// This intrinsic has no corresponding instruction.
4750///
4751/// \param __a
4752///    A 128-bit floating-point vector of [4 x float].
4753/// \returns A 128-bit floating-point vector of [2 x double] containing the same
4754///    bitwise pattern as the parameter.
4755static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a) {
4756  return (__m128d)__a;
4757}
4758 
4759/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
4760///    integer vector.
4761///
4762/// \headerfile <x86intrin.h>
4763///
4764/// This intrinsic has no corresponding instruction.
4765///
4766/// \param __a
4767///    A 128-bit floating-point vector of [4 x float].
4768/// \returns A 128-bit integer vector containing the same bitwise pattern as the
4769///    parameter.
4770static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a) {
4771  return (__m128i)__a;
4772}
4773 
4774/// Casts a 128-bit integer vector into a 128-bit floating-point vector
4775///    of [4 x float].
4776///
4777/// \headerfile <x86intrin.h>
4778///
4779/// This intrinsic has no corresponding instruction.
4780///
4781/// \param __a
4782///    A 128-bit integer vector.
4783/// \returns A 128-bit floating-point vector of [4 x float] containing the same
4784///    bitwise pattern as the parameter.
4785static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a) {
4786  return (__m128)__a;
4787}
4788 
4789/// Casts a 128-bit integer vector into a 128-bit floating-point vector
4790///    of [2 x double].
4791///
4792/// \headerfile <x86intrin.h>
4793///
4794/// This intrinsic has no corresponding instruction.
4795///
4796/// \param __a
4797///    A 128-bit integer vector.
4798/// \returns A 128-bit floating-point vector of [2 x double] containing the same
4799///    bitwise pattern as the parameter.
4800static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) {
4801  return (__m128d)__a;
4802}
4803 
4804/// Compares each of the corresponding double-precision values of two
4805///    128-bit vectors of [2 x double], using the operation specified by the
4806///    immediate integer operand.
4807///
4808///    Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
4809///    If either value in a comparison is NaN, comparisons that are ordered
4810///    return false, and comparisons that are unordered return true.
4811///
4812/// \headerfile <x86intrin.h>
4813///
4814/// \code
4815/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);
4816/// \endcode
4817///
4818/// This intrinsic corresponds to the <c> (V)CMPPD </c> instruction.
4819///
4820/// \param a
4821///    A 128-bit vector of [2 x double].
4822/// \param b
4823///    A 128-bit vector of [2 x double].
4824/// \param c
4825///    An immediate integer operand, with bits [4:0] specifying which comparison
4826///    operation to use: \n
4827///    0x00: Equal (ordered, non-signaling) \n
4828///    0x01: Less-than (ordered, signaling) \n
4829///    0x02: Less-than-or-equal (ordered, signaling) \n
4830///    0x03: Unordered (non-signaling) \n
4831///    0x04: Not-equal (unordered, non-signaling) \n
4832///    0x05: Not-less-than (unordered, signaling) \n
4833///    0x06: Not-less-than-or-equal (unordered, signaling) \n
4834///    0x07: Ordered (non-signaling) \n
4835/// \returns A 128-bit vector of [2 x double] containing the comparison results.
4836#define _mm_cmp_pd(a, b, c)((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), (__v2df)
(__m128d)(b), (c)))                                                    \
4837  ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b),   \
4838                                 (c)))
4839 
4840/// Compares each of the corresponding scalar double-precision values of
4841///    two 128-bit vectors of [2 x double], using the operation specified by the
4842///    immediate integer operand.
4843///
4844///    Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
4845///    If either value in a comparison is NaN, comparisons that are ordered
4846///    return false, and comparisons that are unordered return true.
4847///
4848/// \headerfile <x86intrin.h>
4849///
4850/// \code
4851/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);
4852/// \endcode
4853///
4854/// This intrinsic corresponds to the <c> (V)CMPSD </c> instruction.
4855///
4856/// \param a
4857///    A 128-bit vector of [2 x double].
4858/// \param b
4859///    A 128-bit vector of [2 x double].
4860/// \param c
4861///    An immediate integer operand, with bits [4:0] specifying which comparison
4862///    operation to use: \n
4863///    0x00: Equal (ordered, non-signaling) \n
4864///    0x01: Less-than (ordered, signaling) \n
4865///    0x02: Less-than-or-equal (ordered, signaling) \n
4866///    0x03: Unordered (non-signaling) \n
4867///    0x04: Not-equal (unordered, non-signaling) \n
4868///    0x05: Not-less-than (unordered, signaling) \n
4869///    0x06: Not-less-than-or-equal (unordered, signaling) \n
4870///    0x07: Ordered (non-signaling) \n
4871/// \returns A 128-bit vector of [2 x double] containing the comparison results.
4872#define _mm_cmp_sd(a, b, c)((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), (__v2df)
(__m128d)(b), (c)))                                                    \
4873  ((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b),   \
4874                                 (c)))
4875 
4876#if defined(__cplusplus)
4877extern "C" {
4878#endif
4879 
4880/// Indicates that a spin loop is being executed for the purposes of
4881///    optimizing power consumption during the loop.
4882///
4883/// \headerfile <x86intrin.h>
4884///
4885/// This intrinsic corresponds to the <c> PAUSE </c> instruction.
4886///
4887void _mm_pause(void);
4888 
4889#if defined(__cplusplus)
4890} // extern "C"
4891#endif
4892#undef __DEFAULT_FN_ATTRS
4893#undef __DEFAULT_FN_ATTRS_MMX
4894 
4895#define _MM_SHUFFLE2(x, y)(((x) << 1) | (y)) (((x) << 1) | (y))
4896 
4897#define _MM_DENORMALS_ZERO_ON(0x0040U) (0x0040U)
4898#define _MM_DENORMALS_ZERO_OFF(0x0000U) (0x0000U)
4899 
4900#define _MM_DENORMALS_ZERO_MASK(0x0040U) (0x0040U)
4901 
4902#define _MM_GET_DENORMALS_ZERO_MODE()(_mm_getcsr() & (0x0040U)) (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK(0x0040U))
4903#define _MM_SET_DENORMALS_ZERO_MODE(x)(_mm_setcsr((_mm_getcsr() & ~(0x0040U)) | (x)))                                         \
4904  (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK(0x0040U)) | (x)))
4905 
4906#endif /* __EMMINTRIN_H */