not quite so much needs to be delayed to the init() function
[personal-kdebase.git] / workspace / ksmserver / fadeeffect.cpp
blob49cad40d5c6f8a4137c08b2ebf6ef4e4645243ee
1 /*
2 * Copyright © 2008 Fredrik Höglund <fredrik@kde.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 #include <QThread>
23 #include <QWidget>
24 #include <QPixmap>
25 #include <QTimer>
26 #include <QX11Info>
27 #include <QDebug>
29 #include <solid/device.h>
30 #include <solid/processor.h>
32 #include <X11/Xlib.h>
33 #include <X11/Xutil.h>
35 #include <string>
37 #if defined(__INTEL_COMPILER)
38 # define HAVE_MMX
39 # define HAVE_SSE2
40 #elif defined(__GNUC__)
41 # if defined(__MMX__)
42 # define HAVE_MMX
43 # endif
44 # if defined(__SSE2__) && __GNUC__ > 3
45 # define HAVE_SSE2
46 # endif
47 #endif
49 #ifdef HAVE_MMX
50 # include <mmintrin.h>
51 #endif
53 #ifdef HAVE_SSE2
54 # include <emmintrin.h>
55 #endif
57 #include "fadeeffect.h"
58 #include "fadeeffect.moc"
61 #ifndef HAVE_SSE2
62 static inline void *_mm_malloc(size_t size, int)
64 return malloc(size);
67 static inline void _mm_free(void *p)
69 free(p);
71 #endif
74 static inline int multiply(int a, int b)
76 int res = a * b + 0x80;
77 return (res + (res >> 8)) >> 8;
81 static inline void load(const quint32 src, int *r, int *g, int *b)
83 *r = (src >> 16) & 0xff;
84 *g = (src >> 8) & 0xff;
85 *b = src & 0xff;
89 static inline void load16(const quint16 src, int *r, int *g, int *b)
91 *r = ((src >> 8) & 0x00f8) | ((src >> 13) & 0x0007);
92 *g = ((src >> 3) & 0x00fc) | ((src >> 9) & 0x0003);
93 *b = ((src << 3) & 0x00f8) | ((src >> 2) & 0x0007);
97 static inline quint32 store(const int r, const int g, const int b)
99 return (r << 16) | (g << 8) | b | 0xff000000;
103 static inline quint16 store16(const int r, const int g, const int b)
105 return (((r << 8) | (b >> 3)) & 0xf81f) | ((g << 3) & 0x07e0);
109 static void scanline_blend(const quint32 *over, const quint8 alpha, const quint32 *under,
110 quint32 *result, uint length)
112 for (uint i = 0; i < length; ++i)
114 int sr, sg, sb, dr, dg, db;
116 load(over[i], &sr, &sg, &sb);
117 load(under[i], &dr, &dg, &db);
119 dr = multiply((sr - dr), alpha) + dr;
120 dg = multiply((sg - dg), alpha) + dg;
121 db = multiply((sb - db), alpha) + db;
123 result[i] = store(dr, dg, db);
128 static void scanline_blend_16(const quint16 *over, const quint8 alpha, const quint16 *under,
129 quint16 *result, uint length)
131 for (uint i = 0; i < length; ++i)
133 int sr, sg, sb, dr, dg, db;
135 load16(over[i], &sr, &sg, &sb);
136 load16(under[i], &dr, &dg, &db);
138 dr = multiply((sr - dr), alpha) + dr;
139 dg = multiply((sg - dg), alpha) + dg;
140 db = multiply((sb - db), alpha) + db;
142 result[i] = store16(dr, dg, db);
148 // ----------------------------------------------------------------------------
152 #ifdef HAVE_MMX
153 static inline __m64 multiply(const __m64 m1, const __m64 m2)
155 __m64 res = _mm_mullo_pi16(m1, m2);
156 res = _mm_adds_pi16(res, _mm_set1_pi16 (0x0080));
157 res = _mm_adds_pi16(res, _mm_srli_pi16 (res, 8));
158 return _mm_srli_pi16(res, 8);
162 static inline __m64 add(const __m64 m1, const __m64 m2)
164 return _mm_adds_pi16(m1, m2);
168 static inline __m64 load(const quint32 pixel, const __m64 zero)
170 __m64 m = _mm_cvtsi32_si64(pixel);
171 return _mm_unpacklo_pi8(m, zero);
174 static inline quint32 store(const __m64 pixel, const __m64 zero)
176 __m64 packed = _mm_packs_pu16(pixel, zero);
177 return _mm_cvtsi64_si32(packed);
181 static void scanline_blend_mmx(const quint32 *over, const quint8 a, const quint32 *under,
182 quint32 *result, uint length)
184 register const __m64 alpha = _mm_set1_pi16(quint16 (a));
185 register const __m64 negalpha = _mm_xor_si64(alpha, _mm_set1_pi16 (0x00ff));
186 register const __m64 zero = _mm_setzero_si64();
188 for (uint i = 0; i < length; ++i)
190 __m64 src = load(over[i], zero);
191 __m64 dst = load(under[i], zero);
193 src = multiply(src, alpha);
194 dst = multiply(dst, negalpha);
195 dst = add(src, dst);
197 result[i] = store(dst, zero);
200 _mm_empty();
202 #endif // HAVE_MMX
205 // ----------------------------------------------------------------------------
208 #ifdef HAVE_SSE2
209 static inline __m128i multiply(const __m128i m1, const __m128i m2)
211 __m128i res = _mm_mullo_epi16(m1, m2);
212 res = _mm_adds_epi16(res, _mm_set1_epi16 (0x0080));
213 res = _mm_adds_epi16(res, _mm_srli_epi16 (res, 8));
214 return _mm_srli_epi16(res, 8);
218 static inline __m128i add(const __m128i m1, const __m128i m2)
220 return _mm_adds_epi16(m1, m2);
224 static inline __m128i lower(__m128i m)
226 return _mm_unpacklo_epi8(m, _mm_setzero_si128 ());
230 static inline __m128i upper(__m128i m)
232 return _mm_unpackhi_epi8(m, _mm_setzero_si128 ());
236 void scanline_blend_sse2(const __m128i *over, const quint8 a, const __m128i *under,
237 __m128i *result, uint length)
239 length = (length + 15) >> 4;
240 register const __m128i alpha = _mm_set1_epi16(__uint16_t (a));
241 register const __m128i negalpha = _mm_xor_si128(alpha, _mm_set1_epi16 (0x00ff));
243 for (uint i = 0; i < length; i++)
245 __m128i squad = _mm_load_si128(over + i);
246 __m128i dquad = _mm_load_si128(under + i);
248 __m128i src1 = lower(squad);
249 __m128i dst1 = lower(dquad);
250 __m128i src2 = upper(squad);
251 __m128i dst2 = upper(dquad);
253 squad = add(multiply(src1, alpha), multiply(dst1, negalpha));
254 dquad = add(multiply(src2, alpha), multiply(dst2, negalpha));
256 dquad = _mm_packus_epi16(squad, dquad);
257 _mm_store_si128(result + i, dquad);
260 #endif // HAVE_SSE2
264 // ----------------------------------------------------------------------------
268 class BlendingThread : public QThread
270 public:
271 BlendingThread(QObject *parent);
272 ~BlendingThread();
274 void setImage(XImage *image);
275 void setAlpha(int alpha) { m_alpha = alpha; }
277 private:
278 void toGray16(quint8 *data);
279 void toGray32(quint8 *data);
281 void blend16();
282 void blend32();
283 void blend32_mmx();
284 void blend32_sse2();
286 protected:
287 void run();
289 private:
290 bool have_mmx;
291 bool have_sse2;
292 int m_alpha;
293 XImage *m_image;
294 quint8 *m_original;
295 quint8 *m_final;
299 BlendingThread::BlendingThread(QObject *parent)
300 : QThread(parent)
302 // Check if the CPU supports MMX and SSE2.
303 // We only check the first CPU on an SMP system, and assume all CPU's support the same features.
304 QList<Solid::Device> list = Solid::Device::listFromType(Solid::DeviceInterface::Processor, QString());
305 if (list.size() > 0)
307 Solid::Processor::InstructionSets features = list[0].as<Solid::Processor>()->instructionSets();
308 have_mmx = features & Solid::Processor::IntelMmx;
309 have_sse2 = features & Solid::Processor::IntelSse2;
311 else
313 // Can happen if e.g. there is no usable backend for Solid. Err on the side of caution.
314 // (c.f. bug:163112)
315 have_mmx = false;
316 have_sse2 = false;
319 m_final = NULL;
320 m_original = NULL;
324 BlendingThread::~BlendingThread()
326 _mm_free(m_final);
327 _mm_free(m_original);
331 void BlendingThread::setImage(XImage *image)
333 m_image = image;
334 int size = m_image->bytes_per_line * m_image->height;
336 // We need the data to be aligned on a 128 bit (16 byte) boundary for SSE2
337 m_original = (quint8*) _mm_malloc(size, 16);
338 m_final = (quint8*) _mm_malloc(size, 16);
340 memcpy((void*)m_original, (const void*)m_image->data, size);
341 memcpy((void*)m_final, (const void*)m_image->data, size);
343 if (m_image->depth != 16)
344 toGray32(m_final);
345 else
346 toGray16(m_final);
350 void BlendingThread::toGray16(quint8 *data)
352 for (int y = 0; y < m_image->height; y++)
354 quint16 *pixels = (quint16*)(data + (m_image->bytes_per_line * y));
355 for (int x = 0; x < m_image->width; x++)
357 int red, green, blue;
358 load16(pixels[x], &red, &green, &blue);
360 // Make sure the 3 least significant bits are 0, so the red, green and blue
361 // channels really have the same value when packed in a 5/6/5 representation.
362 int val = int(red * .299 + green * .587 + blue * .114) & 0xf8;
363 pixels[x] = store16(val, val, val);
369 void BlendingThread::toGray32(quint8 *data)
371 for (int y = 0; y < m_image->height; y++)
373 quint32 *pixels = (quint32*)(data + (m_image->bytes_per_line * y));
374 for (int x = 0; x < m_image->width; x++)
376 int red, green, blue;
377 load(pixels[x], &red, &green, &blue);
379 int val = int(red * .299 + green * .587 + blue * .114);
380 pixels[x] = store(val, val, val);
386 void BlendingThread::blend16()
388 for (int y = 0; y < m_image->height; y++)
390 uint start = m_image->bytes_per_line * y;
391 quint16 *over = (quint16*)(m_original + start);
392 quint16 *under = (quint16*)(m_final + start);
393 quint16 *result = (quint16*)(m_image->data + start);
395 scanline_blend_16(over, m_alpha, under, result, m_image->width);
400 void BlendingThread::blend32()
402 for (int y = 0; y < m_image->height; y++)
404 int start = m_image->bytes_per_line * y;
405 quint32 *over = (quint32*)(m_original + start);
406 quint32 *under = (quint32*)(m_final + start);
407 quint32 *result = (quint32*)(m_image->data + start);
409 scanline_blend(over, m_alpha, under, result, m_image->width);
414 void BlendingThread::blend32_mmx()
416 #ifdef HAVE_MMX
417 for (int y = 0; y < m_image->height; y++)
419 int start = m_image->bytes_per_line * y;
420 quint32 *over = (quint32*)(m_original + start);
421 quint32 *under = (quint32*)(m_final + start);
422 quint32 *result = (quint32*)(m_image->data + start);
424 scanline_blend_mmx(over, m_alpha, under, result, m_image->width);
426 #endif
430 void BlendingThread::blend32_sse2()
432 #ifdef HAVE_SSE2
433 uint length = m_image->bytes_per_line * m_image->height;
435 __m128i *over = (__m128i*)(m_original);
436 __m128i *under = (__m128i*)(m_final);
437 __m128i *result = (__m128i*)(m_image->data);
439 scanline_blend_sse2(over, m_alpha, under, result, length);
440 #endif
444 void BlendingThread::run()
446 if (m_image->depth != 16)
448 #ifdef HAVE_SSE2
449 if (have_sse2)
450 blend32_sse2();
451 else
452 #endif
453 #ifdef HAVE_MMX
454 if (have_mmx)
455 blend32_mmx();
456 else
457 #endif
458 blend32();
460 else
461 blend16();
466 // ----------------------------------------------------------------------------
470 FadeEffect::FadeEffect(QWidget *parent, QPixmap *pixmap)
471 : LogoutEffect(parent, pixmap), blender(NULL)
473 Display *dpy = parent->x11Info().display();
475 image = XCreateImage(dpy, (Visual*)pixmap->x11Info().visual(), pixmap->depth(),
476 ZPixmap, 0, NULL, pixmap->width(), pixmap->height(), 32, 0);
478 // Allocate the image data on 16 byte boundary for SSE2
479 image->data = (char*)_mm_malloc(image->bytes_per_line * image->height, 16);
481 gc = XCreateGC(dpy, pixmap->handle(), 0, NULL);
483 blender = new BlendingThread(this);
484 currentY = 0;
488 FadeEffect::~FadeEffect()
490 blender->wait();
491 XDestroyImage(image);
492 XFreeGC(QX11Info::display(), gc);
496 void FadeEffect::start()
498 done = false;
499 alpha = 255;
501 // Start by grabbing the screenshot
502 grabImageSection();
506 void FadeEffect::grabImageSection()
508 const int sectionHeight = 64;
509 int h = (currentY + sectionHeight > image->height) ? image->height - currentY : sectionHeight;
511 XGetSubImage(QX11Info::display(), QX11Info::appRootWindow(), 0, currentY, image->width, h,
512 AllPlanes, ZPixmap, image, 0, currentY);
514 // Continue until we have the whole image
515 currentY += sectionHeight;
516 if (currentY < image->height)
518 QTimer::singleShot(1, this, SLOT(grabImageSection()));
519 return;
522 // Let the owner know we're done.
523 emit initialized();
525 // Start the fade effect
526 blender->setImage(image);
527 blender->setAlpha(alpha);
528 blender->start();
529 time.start();
531 QTimer::singleShot(10, this, SLOT(nextFrame()));
535 void FadeEffect::nextFrame()
537 const qreal runTime = 2000; // milliseconds
539 if (!blender->isFinished())
541 QTimer::singleShot(10, this, SLOT(nextFrame()));
542 return;
545 XPutImage(QX11Info::display(), pixmap->handle(), gc, image, 0, 0, 0, 0, image->width, image->height);
546 parent->update();
548 alpha = qRound(qMax(255. - (255. * (qreal(time.elapsed() / runTime))), 0.0));
550 if (!done)
552 blender->setAlpha(alpha);
553 blender->start();
555 // Make sure we don't send frames faster than the X server can process them
556 XSync(QX11Info::display(), False);
557 QTimer::singleShot(1, this, SLOT(nextFrame()));
560 if (alpha == 0)
561 done = true;