2 * Copyright © 2008 Fredrik Höglund <fredrik@kde.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include <solid/device.h>
30 #include <solid/processor.h>
33 #include <X11/Xutil.h>
37 #if defined(__INTEL_COMPILER)
40 #elif defined(__GNUC__)
44 # if defined(__SSE2__) && __GNUC__ > 3
50 # include <mmintrin.h>
54 # include <emmintrin.h>
57 #include "fadeeffect.h"
58 #include "fadeeffect.moc"
62 static inline void *_mm_malloc(size_t size
, int)
67 static inline void _mm_free(void *p
)
74 static inline int multiply(int a
, int b
)
76 int res
= a
* b
+ 0x80;
77 return (res
+ (res
>> 8)) >> 8;
81 static inline void load(const quint32 src
, int *r
, int *g
, int *b
)
83 *r
= (src
>> 16) & 0xff;
84 *g
= (src
>> 8) & 0xff;
89 static inline void load16(const quint16 src
, int *r
, int *g
, int *b
)
91 *r
= ((src
>> 8) & 0x00f8) | ((src
>> 13) & 0x0007);
92 *g
= ((src
>> 3) & 0x00fc) | ((src
>> 9) & 0x0003);
93 *b
= ((src
<< 3) & 0x00f8) | ((src
>> 2) & 0x0007);
97 static inline quint32
store(const int r
, const int g
, const int b
)
99 return (r
<< 16) | (g
<< 8) | b
| 0xff000000;
103 static inline quint16
store16(const int r
, const int g
, const int b
)
105 return (((r
<< 8) | (b
>> 3)) & 0xf81f) | ((g
<< 3) & 0x07e0);
109 static void scanline_blend(const quint32
*over
, const quint8 alpha
, const quint32
*under
,
110 quint32
*result
, uint length
)
112 for (uint i
= 0; i
< length
; ++i
)
114 int sr
, sg
, sb
, dr
, dg
, db
;
116 load(over
[i
], &sr
, &sg
, &sb
);
117 load(under
[i
], &dr
, &dg
, &db
);
119 dr
= multiply((sr
- dr
), alpha
) + dr
;
120 dg
= multiply((sg
- dg
), alpha
) + dg
;
121 db
= multiply((sb
- db
), alpha
) + db
;
123 result
[i
] = store(dr
, dg
, db
);
128 static void scanline_blend_16(const quint16
*over
, const quint8 alpha
, const quint16
*under
,
129 quint16
*result
, uint length
)
131 for (uint i
= 0; i
< length
; ++i
)
133 int sr
, sg
, sb
, dr
, dg
, db
;
135 load16(over
[i
], &sr
, &sg
, &sb
);
136 load16(under
[i
], &dr
, &dg
, &db
);
138 dr
= multiply((sr
- dr
), alpha
) + dr
;
139 dg
= multiply((sg
- dg
), alpha
) + dg
;
140 db
= multiply((sb
- db
), alpha
) + db
;
142 result
[i
] = store16(dr
, dg
, db
);
148 // ----------------------------------------------------------------------------
153 static inline __m64
multiply(const __m64 m1
, const __m64 m2
)
155 __m64 res
= _mm_mullo_pi16(m1
, m2
);
156 res
= _mm_adds_pi16(res
, _mm_set1_pi16 (0x0080));
157 res
= _mm_adds_pi16(res
, _mm_srli_pi16 (res
, 8));
158 return _mm_srli_pi16(res
, 8);
162 static inline __m64
add(const __m64 m1
, const __m64 m2
)
164 return _mm_adds_pi16(m1
, m2
);
168 static inline __m64
load(const quint32 pixel
, const __m64 zero
)
170 __m64 m
= _mm_cvtsi32_si64(pixel
);
171 return _mm_unpacklo_pi8(m
, zero
);
174 static inline quint32
store(const __m64 pixel
, const __m64 zero
)
176 __m64 packed
= _mm_packs_pu16(pixel
, zero
);
177 return _mm_cvtsi64_si32(packed
);
181 static void scanline_blend_mmx(const quint32
*over
, const quint8 a
, const quint32
*under
,
182 quint32
*result
, uint length
)
184 register const __m64 alpha
= _mm_set1_pi16(quint16 (a
));
185 register const __m64 negalpha
= _mm_xor_si64(alpha
, _mm_set1_pi16 (0x00ff));
186 register const __m64 zero
= _mm_setzero_si64();
188 for (uint i
= 0; i
< length
; ++i
)
190 __m64 src
= load(over
[i
], zero
);
191 __m64 dst
= load(under
[i
], zero
);
193 src
= multiply(src
, alpha
);
194 dst
= multiply(dst
, negalpha
);
197 result
[i
] = store(dst
, zero
);
205 // ----------------------------------------------------------------------------
209 static inline __m128i
multiply(const __m128i m1
, const __m128i m2
)
211 __m128i res
= _mm_mullo_epi16(m1
, m2
);
212 res
= _mm_adds_epi16(res
, _mm_set1_epi16 (0x0080));
213 res
= _mm_adds_epi16(res
, _mm_srli_epi16 (res
, 8));
214 return _mm_srli_epi16(res
, 8);
218 static inline __m128i
add(const __m128i m1
, const __m128i m2
)
220 return _mm_adds_epi16(m1
, m2
);
224 static inline __m128i
lower(__m128i m
)
226 return _mm_unpacklo_epi8(m
, _mm_setzero_si128 ());
230 static inline __m128i
upper(__m128i m
)
232 return _mm_unpackhi_epi8(m
, _mm_setzero_si128 ());
236 void scanline_blend_sse2(const __m128i
*over
, const quint8 a
, const __m128i
*under
,
237 __m128i
*result
, uint length
)
239 length
= (length
+ 15) >> 4;
240 register const __m128i alpha
= _mm_set1_epi16(__uint16_t (a
));
241 register const __m128i negalpha
= _mm_xor_si128(alpha
, _mm_set1_epi16 (0x00ff));
243 for (uint i
= 0; i
< length
; i
++)
245 __m128i squad
= _mm_load_si128(over
+ i
);
246 __m128i dquad
= _mm_load_si128(under
+ i
);
248 __m128i src1
= lower(squad
);
249 __m128i dst1
= lower(dquad
);
250 __m128i src2
= upper(squad
);
251 __m128i dst2
= upper(dquad
);
253 squad
= add(multiply(src1
, alpha
), multiply(dst1
, negalpha
));
254 dquad
= add(multiply(src2
, alpha
), multiply(dst2
, negalpha
));
256 dquad
= _mm_packus_epi16(squad
, dquad
);
257 _mm_store_si128(result
+ i
, dquad
);
264 // ----------------------------------------------------------------------------
268 class BlendingThread
: public QThread
271 BlendingThread(QObject
*parent
);
274 void setImage(XImage
*image
);
275 void setAlpha(int alpha
) { m_alpha
= alpha
; }
278 void toGray16(quint8
*data
);
279 void toGray32(quint8
*data
);
299 BlendingThread::BlendingThread(QObject
*parent
)
302 // Check if the CPU supports MMX and SSE2.
303 // We only check the first CPU on an SMP system, and assume all CPU's support the same features.
304 QList
<Solid::Device
> list
= Solid::Device::listFromType(Solid::DeviceInterface::Processor
, QString());
307 Solid::Processor::InstructionSets features
= list
[0].as
<Solid::Processor
>()->instructionSets();
308 have_mmx
= features
& Solid::Processor::IntelMmx
;
309 have_sse2
= features
& Solid::Processor::IntelSse2
;
313 // Can happen if e.g. there is no usable backend for Solid. Err on the side of caution.
324 BlendingThread::~BlendingThread()
327 _mm_free(m_original
);
331 void BlendingThread::setImage(XImage
*image
)
334 int size
= m_image
->bytes_per_line
* m_image
->height
;
336 // We need the data to be aligned on a 128 bit (16 byte) boundary for SSE2
337 m_original
= (quint8
*) _mm_malloc(size
, 16);
338 m_final
= (quint8
*) _mm_malloc(size
, 16);
340 memcpy((void*)m_original
, (const void*)m_image
->data
, size
);
341 memcpy((void*)m_final
, (const void*)m_image
->data
, size
);
343 if (m_image
->depth
!= 16)
350 void BlendingThread::toGray16(quint8
*data
)
352 for (int y
= 0; y
< m_image
->height
; y
++)
354 quint16
*pixels
= (quint16
*)(data
+ (m_image
->bytes_per_line
* y
));
355 for (int x
= 0; x
< m_image
->width
; x
++)
357 int red
, green
, blue
;
358 load16(pixels
[x
], &red
, &green
, &blue
);
360 // Make sure the 3 least significant bits are 0, so the red, green and blue
361 // channels really have the same value when packed in a 5/6/5 representation.
362 int val
= int(red
* .299 + green
* .587 + blue
* .114) & 0xf8;
363 pixels
[x
] = store16(val
, val
, val
);
369 void BlendingThread::toGray32(quint8
*data
)
371 for (int y
= 0; y
< m_image
->height
; y
++)
373 quint32
*pixels
= (quint32
*)(data
+ (m_image
->bytes_per_line
* y
));
374 for (int x
= 0; x
< m_image
->width
; x
++)
376 int red
, green
, blue
;
377 load(pixels
[x
], &red
, &green
, &blue
);
379 int val
= int(red
* .299 + green
* .587 + blue
* .114);
380 pixels
[x
] = store(val
, val
, val
);
386 void BlendingThread::blend16()
388 for (int y
= 0; y
< m_image
->height
; y
++)
390 uint start
= m_image
->bytes_per_line
* y
;
391 quint16
*over
= (quint16
*)(m_original
+ start
);
392 quint16
*under
= (quint16
*)(m_final
+ start
);
393 quint16
*result
= (quint16
*)(m_image
->data
+ start
);
395 scanline_blend_16(over
, m_alpha
, under
, result
, m_image
->width
);
400 void BlendingThread::blend32()
402 for (int y
= 0; y
< m_image
->height
; y
++)
404 int start
= m_image
->bytes_per_line
* y
;
405 quint32
*over
= (quint32
*)(m_original
+ start
);
406 quint32
*under
= (quint32
*)(m_final
+ start
);
407 quint32
*result
= (quint32
*)(m_image
->data
+ start
);
409 scanline_blend(over
, m_alpha
, under
, result
, m_image
->width
);
414 void BlendingThread::blend32_mmx()
417 for (int y
= 0; y
< m_image
->height
; y
++)
419 int start
= m_image
->bytes_per_line
* y
;
420 quint32
*over
= (quint32
*)(m_original
+ start
);
421 quint32
*under
= (quint32
*)(m_final
+ start
);
422 quint32
*result
= (quint32
*)(m_image
->data
+ start
);
424 scanline_blend_mmx(over
, m_alpha
, under
, result
, m_image
->width
);
430 void BlendingThread::blend32_sse2()
433 uint length
= m_image
->bytes_per_line
* m_image
->height
;
435 __m128i
*over
= (__m128i
*)(m_original
);
436 __m128i
*under
= (__m128i
*)(m_final
);
437 __m128i
*result
= (__m128i
*)(m_image
->data
);
439 scanline_blend_sse2(over
, m_alpha
, under
, result
, length
);
444 void BlendingThread::run()
446 if (m_image
->depth
!= 16)
466 // ----------------------------------------------------------------------------
470 FadeEffect::FadeEffect(QWidget
*parent
, QPixmap
*pixmap
)
471 : LogoutEffect(parent
, pixmap
), blender(NULL
)
473 Display
*dpy
= parent
->x11Info().display();
475 image
= XCreateImage(dpy
, (Visual
*)pixmap
->x11Info().visual(), pixmap
->depth(),
476 ZPixmap
, 0, NULL
, pixmap
->width(), pixmap
->height(), 32, 0);
478 // Allocate the image data on 16 byte boundary for SSE2
479 image
->data
= (char*)_mm_malloc(image
->bytes_per_line
* image
->height
, 16);
481 gc
= XCreateGC(dpy
, pixmap
->handle(), 0, NULL
);
483 blender
= new BlendingThread(this);
488 FadeEffect::~FadeEffect()
491 XDestroyImage(image
);
492 XFreeGC(QX11Info::display(), gc
);
496 void FadeEffect::start()
501 // Start by grabbing the screenshot
506 void FadeEffect::grabImageSection()
508 const int sectionHeight
= 64;
509 int h
= (currentY
+ sectionHeight
> image
->height
) ? image
->height
- currentY
: sectionHeight
;
511 XGetSubImage(QX11Info::display(), QX11Info::appRootWindow(), 0, currentY
, image
->width
, h
,
512 AllPlanes
, ZPixmap
, image
, 0, currentY
);
514 // Continue until we have the whole image
515 currentY
+= sectionHeight
;
516 if (currentY
< image
->height
)
518 QTimer::singleShot(1, this, SLOT(grabImageSection()));
522 // Let the owner know we're done.
525 // Start the fade effect
526 blender
->setImage(image
);
527 blender
->setAlpha(alpha
);
531 QTimer::singleShot(10, this, SLOT(nextFrame()));
535 void FadeEffect::nextFrame()
537 const qreal runTime
= 2000; // milliseconds
539 if (!blender
->isFinished())
541 QTimer::singleShot(10, this, SLOT(nextFrame()));
545 XPutImage(QX11Info::display(), pixmap
->handle(), gc
, image
, 0, 0, 0, 0, image
->width
, image
->height
);
548 alpha
= qRound(qMax(255. - (255. * (qreal(time
.elapsed() / runTime
))), 0.0));
552 blender
->setAlpha(alpha
);
555 // Make sure we don't send frames faster than the X server can process them
556 XSync(QX11Info::display(), False
);
557 QTimer::singleShot(1, this, SLOT(nextFrame()));