1 // Copyright 2014 Google Inc. All Rights Reserved.
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
10 // Utilities for processing transparent channel.
12 // Author: Skal (pascal.massimino@gmail.com)
16 #if defined(WEBP_USE_SSE2)
17 #include <emmintrin.h>
19 //------------------------------------------------------------------------------
21 static int ExtractAlpha(const uint8_t* argb
, int argb_stride
,
22 int width
, int height
,
23 uint8_t* alpha
, int alpha_stride
) {
24 // alpha_and stores an 'and' operation of all the alpha[] values. The final
25 // value is not 0xff if any of the alpha[] is not equal to 0xff.
26 uint32_t alpha_and
= 0xff;
28 const __m128i a_mask
= _mm_set1_epi32(0xffu
); // to preserve alpha
29 const __m128i all_0xff
= _mm_set_epi32(0, 0, ~0u, ~0u);
30 __m128i all_alphas
= all_0xff
;
32 // We must be able to access 3 extra bytes after the last written byte
33 // 'src[4 * width - 4]', because we don't know if alpha is the first or the
34 // last byte of the quadruplet.
35 const int limit
= (width
- 1) & ~7;
37 for (j
= 0; j
< height
; ++j
) {
38 const __m128i
* src
= (const __m128i
*)argb
;
39 for (i
= 0; i
< limit
; i
+= 8) {
41 const __m128i a0
= _mm_loadu_si128(src
+ 0);
42 const __m128i a1
= _mm_loadu_si128(src
+ 1);
43 const __m128i b0
= _mm_and_si128(a0
, a_mask
);
44 const __m128i b1
= _mm_and_si128(a1
, a_mask
);
45 const __m128i c0
= _mm_packs_epi32(b0
, b1
);
46 const __m128i d0
= _mm_packus_epi16(c0
, c0
);
48 _mm_storel_epi64((__m128i
*)&alpha
[i
], d0
);
49 // accumulate eight alpha 'and' in parallel
50 all_alphas
= _mm_and_si128(all_alphas
, d0
);
53 for (; i
< width
; ++i
) {
54 const uint32_t alpha_value
= argb
[4 * i
];
55 alpha
[i
] = alpha_value
;
56 alpha_and
&= alpha_value
;
59 alpha
+= alpha_stride
;
61 // Combine the eight alpha 'and' into a 8-bit mask.
62 alpha_and
&= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas
, all_0xff
));
63 return (alpha_and
== 0xff);
66 #endif // WEBP_USE_SSE2
68 //------------------------------------------------------------------------------
71 extern void WebPInitAlphaProcessingSSE2(void);
73 void WebPInitAlphaProcessingSSE2(void) {
74 #if defined(WEBP_USE_SSE2)
75 WebPExtractAlpha
= ExtractAlpha
;