workspace/ksmserver/fadeeffect.cpp

   1 /*
   2  * Copyright © 2008 Fredrik Höglund <fredrik@kde.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a copy
   5  * of this software and associated documentation files (the "Software"), to deal
   6  * in the Software without restriction, including without limitation the rights
   7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   8  * copies of the Software, and to permit persons to whom the Software is
   9  * furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
  17  * AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  */
  21
  22 #include <QThread>
  23 #include <QWidget>
  24 #include <QPixmap>
  25 #include <QTimer>
  26 #include <QX11Info>
  27 #include <QDebug>
  28
  29 #include <solid/device.h>
  30 #include <solid/processor.h>
  31
  32 #include <X11/Xlib.h>
  33 #include <X11/Xutil.h>
  34
  35 #include <string>
  36
  37 #if defined(__INTEL_COMPILER)
  38 #  define HAVE_MMX
  39 #  define HAVE_SSE2
  40 #elif defined(__GNUC__)
  41 #  if defined(__MMX__)
  42 #    define HAVE_MMX
  43 #  endif
  44 #  if defined(__SSE2__) && __GNUC__ > 3
  45 #    define HAVE_SSE2
  46 #  endif
  47 #endif
  48
  49 #ifdef HAVE_MMX
  50 #  include <mmintrin.h>
  51 #endif
  52
  53 #ifdef HAVE_SSE2
  54 #  include <emmintrin.h>
  55 #endif
  56
  57 #include "fadeeffect.h"
  58 #include "fadeeffect.moc"
  59
  60
  61 #ifndef HAVE_SSE2
  62 static inline void *_mm_malloc(size_t size, int)
  63 {
  64     return malloc(size);
  65 }
  66
  67 static inline void _mm_free(void *p)
  68 {
  69     free(p);
  70 }
  71 #endif
  72
  73
  74 static inline int multiply(int a, int b)
  75 {
  76     int res = a * b + 0x80;
  77     return (res + (res >> 8)) >> 8;
  78 }
  79
  80
  81 static inline void load(const quint32 src, int *r, int *g, int *b)
  82 {
  83     *r = (src >> 16) & 0xff;
  84     *g = (src >> 8) & 0xff;
  85     *b = src & 0xff;
  86 }
  87
  88
  89 static inline void load16(const quint16 src, int *r, int *g, int *b)
  90 {
  91     *r = ((src >> 8) & 0x00f8) | ((src >> 13) & 0x0007);
  92     *g = ((src >> 3) & 0x00fc) | ((src >>  9) & 0x0003);
  93     *b = ((src << 3) & 0x00f8) | ((src >>  2) & 0x0007);
  94 }
  95
  96
  97 static inline quint32 store(const int r, const int g, const int b)
  98 {
  99     return (r << 16) | (g << 8) | b | 0xff000000;
 100 }
 101
 102
 103 static inline quint16 store16(const int r, const int g, const int b)
 104 {
 105     return (((r << 8) | (b >> 3)) & 0xf81f) | ((g << 3) & 0x07e0);
 106 }
 107
 108
 109 static void scanline_blend(const quint32 *over, const quint8 alpha, const quint32 *under,
 110                            quint32 *result, uint length)
 111 {
 112     for (uint i = 0; i < length; ++i)
 113     {
 114         int sr, sg, sb, dr, dg, db;
 115
 116         load(over[i],  &sr, &sg, &sb);
 117         load(under[i], &dr, &dg, &db);
 118
 119         dr = multiply((sr - dr), alpha) + dr;
 120         dg = multiply((sg - dg), alpha) + dg;
 121         db = multiply((sb - db), alpha) + db;
 122
 123         result[i] = store(dr, dg, db);
 124     }
 125 }
 126
 127
 128 static void scanline_blend_16(const quint16 *over, const quint8 alpha, const quint16 *under,
 129                               quint16 *result, uint length)
 130 {
 131     for (uint i = 0; i < length; ++i)
 132     {
 133         int sr, sg, sb, dr, dg, db;
 134
 135         load16(over[i],  &sr, &sg, &sb);
 136         load16(under[i], &dr, &dg, &db);
 137
 138         dr = multiply((sr - dr), alpha) + dr;
 139         dg = multiply((sg - dg), alpha) + dg;
 140         db = multiply((sb - db), alpha) + db;
 141
 142         result[i] = store16(dr, dg, db);
 143     }
 144 }
 145
 146
 147
 148 // ----------------------------------------------------------------------------
 149
 150
 151
 152 #ifdef HAVE_MMX
 153 static inline __m64 multiply(const __m64 m1, const __m64 m2)
 154 {
 155     __m64 res = _mm_mullo_pi16(m1, m2);
 156     res = _mm_adds_pi16(res, _mm_set1_pi16 (0x0080));
 157     res = _mm_adds_pi16(res, _mm_srli_pi16 (res, 8));
 158     return _mm_srli_pi16(res, 8);
 159 }
 160
 161
 162 static inline __m64 add(const __m64 m1, const __m64 m2)
 163 {
 164     return _mm_adds_pi16(m1, m2);
 165 }
 166
 167
 168 static inline __m64 load(const quint32 pixel, const __m64 zero)
 169 {
 170     __m64 m = _mm_cvtsi32_si64(pixel);
 171     return _mm_unpacklo_pi8(m, zero);
 172 }
 173
 174 static inline quint32 store(const __m64 pixel, const __m64 zero)
 175 {
 176     __m64 packed = _mm_packs_pu16(pixel, zero);
 177     return _mm_cvtsi64_si32(packed);
 178 }
 179
 180
 181 static void scanline_blend_mmx(const quint32 *over, const quint8 a, const quint32 *under,
 182                                quint32 *result, uint length)
 183 {
 184     register const __m64 alpha    = _mm_set1_pi16(quint16 (a));
 185     register const __m64 negalpha = _mm_xor_si64(alpha, _mm_set1_pi16 (0x00ff));
 186     register const __m64 zero     = _mm_setzero_si64();
 187
 188     for (uint i = 0; i < length; ++i)
 189     {
 190         __m64 src = load(over[i],  zero);
 191         __m64 dst = load(under[i], zero);
 192
 193         src = multiply(src, alpha);
 194         dst = multiply(dst, negalpha);
 195         dst = add(src, dst);
 196
 197         result[i] = store(dst, zero);
 198     }
 199
 200     _mm_empty();
 201 }
 202 #endif // HAVE_MMX
 203
 204
 205 // ----------------------------------------------------------------------------
 206
 207
 208 #ifdef HAVE_SSE2
 209 static inline __m128i multiply(const __m128i m1, const __m128i m2)
 210 {
 211     __m128i res = _mm_mullo_epi16(m1, m2);
 212     res = _mm_adds_epi16(res, _mm_set1_epi16 (0x0080));
 213     res = _mm_adds_epi16(res, _mm_srli_epi16 (res, 8));
 214     return _mm_srli_epi16(res, 8);
 215 }
 216
 217
 218 static inline __m128i add(const __m128i m1, const __m128i m2)
 219 {
 220     return _mm_adds_epi16(m1, m2);
 221 }
 222
 223
 224 static inline __m128i lower(__m128i m)
 225 {
 226     return _mm_unpacklo_epi8(m, _mm_setzero_si128 ());
 227 }
 228
 229
 230 static inline __m128i upper(__m128i m)
 231 {
 232     return _mm_unpackhi_epi8(m, _mm_setzero_si128 ());
 233 }
 234
 235
 236 void scanline_blend_sse2(const __m128i *over, const quint8 a, const __m128i *under,
 237                          __m128i *result, uint length)
 238 {
 239     length = (length + 15) >> 4;
 240     register const __m128i alpha    = _mm_set1_epi16(__uint16_t (a));
 241     register const __m128i negalpha = _mm_xor_si128(alpha, _mm_set1_epi16 (0x00ff));
 242
 243     for (uint i = 0; i < length; i++)
 244     {
 245         __m128i squad = _mm_load_si128(over  + i);
 246         __m128i dquad = _mm_load_si128(under + i);
 247
 248         __m128i src1 = lower(squad);
 249         __m128i dst1 = lower(dquad);
 250         __m128i src2 = upper(squad);
 251         __m128i dst2 = upper(dquad);
 252
 253         squad = add(multiply(src1, alpha), multiply(dst1, negalpha));
 254         dquad = add(multiply(src2, alpha), multiply(dst2, negalpha));
 255
 256         dquad = _mm_packus_epi16(squad, dquad);
 257         _mm_store_si128(result + i, dquad);
 258     }
 259 }
 260 #endif // HAVE_SSE2
 261
 262
 263
 264 // ----------------------------------------------------------------------------
 265
 266
 267
 268 class BlendingThread : public QThread
 269 {
 270 public:
 271     BlendingThread(QObject *parent);
 272     ~BlendingThread();
 273
 274     void setImage(XImage *image);
 275     void setAlpha(int alpha) { m_alpha = alpha; }
 276
 277 private:
 278     void toGray16(quint8 *data);
 279     void toGray32(quint8 *data);
 280
 281     void blend16();
 282     void blend32();
 283     void blend32_mmx();
 284     void blend32_sse2();
 285
 286 protected:
 287     void run();
 288
 289 private:
 290     bool have_mmx;
 291     bool have_sse2;
 292     int m_alpha;
 293     XImage *m_image;
 294     quint8 *m_original;
 295     quint8 *m_final;
 296 };
 297
 298
 299 BlendingThread::BlendingThread(QObject *parent)
 300     : QThread(parent)
 301 {
 302     // Check if the CPU supports MMX and SSE2.
 303     // We only check the first CPU on an SMP system, and assume all CPU's support the same features.
 304     QList<Solid::Device> list = Solid::Device::listFromType(Solid::DeviceInterface::Processor, QString());
 305     if (list.size() > 0)
 306     {
 307         Solid::Processor::InstructionSets features = list[0].as<Solid::Processor>()->instructionSets();
 308         have_mmx  = features & Solid::Processor::IntelMmx;
 309         have_sse2 = features & Solid::Processor::IntelSse2;
 310     }
 311     else
 312     {
 313         // Can happen if e.g. there is no usable backend for Solid.  Err on the side of caution.
 314         // (c.f. bug:163112)
 315         have_mmx  = false;
 316         have_sse2 = false;
 317     }
 318
 319     m_final    = NULL;
 320     m_original = NULL;
 321 }
 322
 323
 324 BlendingThread::~BlendingThread()
 325 {
 326     _mm_free(m_final);
 327     _mm_free(m_original);
 328 }
 329
 330
 331 void BlendingThread::setImage(XImage *image)
 332 {
 333     m_image = image;
 334     int size = m_image->bytes_per_line * m_image->height;
 335
 336     // We need the data to be aligned on a 128 bit (16 byte) boundary for SSE2
 337     m_original = (quint8*) _mm_malloc(size, 16);
 338     m_final    = (quint8*) _mm_malloc(size, 16);
 339
 340     memcpy((void*)m_original, (const void*)m_image->data, size);
 341     memcpy((void*)m_final,    (const void*)m_image->data, size);
 342
 343     if (m_image->depth != 16)
 344         toGray32(m_final);
 345     else
 346         toGray16(m_final);
 347 }
 348
 349
 350 void BlendingThread::toGray16(quint8 *data)
 351 {
 352     for (int y = 0; y < m_image->height; y++)
 353     {
 354         quint16 *pixels = (quint16*)(data + (m_image->bytes_per_line * y));
 355         for (int x = 0; x < m_image->width; x++)
 356         {
 357             int red, green, blue;
 358             load16(pixels[x], &red, &green, &blue);
 359
 360             // Make sure the 3 least significant bits are 0, so the red, green and blue
 361             // channels really have the same value when packed in a 5/6/5 representation.
 362             int val = int(red * .299 + green * .587 + blue * .114) & 0xf8;
 363             pixels[x] = store16(val, val, val);
 364         }
 365     }
 366 }
 367
 368
 369 void BlendingThread::toGray32(quint8 *data)
 370 {
 371     for (int y = 0; y < m_image->height; y++)
 372     {
 373         quint32 *pixels = (quint32*)(data + (m_image->bytes_per_line * y));
 374         for (int x = 0; x < m_image->width; x++)
 375         {
 376             int red, green, blue;
 377             load(pixels[x], &red, &green, &blue);
 378
 379             int val = int(red * .299 + green * .587 + blue * .114);
 380             pixels[x] = store(val, val, val);
 381         }
 382     }
 383 }
 384
 385
 386 void BlendingThread::blend16()
 387 {
 388     for (int y = 0; y < m_image->height; y++)
 389     {
 390         uint start = m_image->bytes_per_line * y;
 391         quint16 *over   = (quint16*)(m_original + start);
 392         quint16 *under  = (quint16*)(m_final + start);
 393         quint16 *result = (quint16*)(m_image->data + start);
 394
 395         scanline_blend_16(over, m_alpha, under, result, m_image->width);
 396     }
 397 }
 398
 399
 400 void BlendingThread::blend32()
 401 {
 402     for (int y = 0; y < m_image->height; y++)
 403     {
 404         int start = m_image->bytes_per_line * y;
 405         quint32 *over   = (quint32*)(m_original + start);
 406         quint32 *under  = (quint32*)(m_final + start);
 407         quint32 *result = (quint32*)(m_image->data + start);
 408
 409         scanline_blend(over, m_alpha, under, result, m_image->width);
 410     }
 411 }
 412
 413
 414 void BlendingThread::blend32_mmx()
 415 {
 416 #ifdef HAVE_MMX
 417     for (int y = 0; y < m_image->height; y++)
 418     {
 419         int start = m_image->bytes_per_line * y;
 420         quint32 *over   = (quint32*)(m_original + start);
 421         quint32 *under  = (quint32*)(m_final + start);
 422         quint32 *result = (quint32*)(m_image->data + start);
 423
 424         scanline_blend_mmx(over, m_alpha, under, result, m_image->width);
 425     }
 426 #endif
 427 }
 428
 429
 430 void BlendingThread::blend32_sse2()
 431 {
 432 #ifdef HAVE_SSE2
 433     uint length = m_image->bytes_per_line * m_image->height;
 434
 435     __m128i *over   = (__m128i*)(m_original);
 436     __m128i *under  = (__m128i*)(m_final);
 437     __m128i *result = (__m128i*)(m_image->data);
 438
 439     scanline_blend_sse2(over, m_alpha, under, result, length);
 440 #endif
 441 }
 442
 443
 444 void BlendingThread::run()
 445 {
 446     if (m_image->depth != 16)
 447     {
 448 #ifdef HAVE_SSE2
 449         if (have_sse2)
 450             blend32_sse2();
 451         else
 452 #endif
 453 #ifdef HAVE_MMX
 454        if (have_mmx)
 455             blend32_mmx();
 456        else
 457 #endif
 458             blend32();
 459     }
 460     else
 461         blend16();
 462 }
 463
 464
 465
 466 // ----------------------------------------------------------------------------
 467
 468
 469
 470 FadeEffect::FadeEffect(QWidget *parent, QPixmap *pixmap)
 471     : LogoutEffect(parent, pixmap), blender(NULL)
 472 {
 473     Display *dpy = parent->x11Info().display();
 474
 475     image = XCreateImage(dpy, (Visual*)pixmap->x11Info().visual(), pixmap->depth(),
 476                          ZPixmap, 0, NULL, pixmap->width(), pixmap->height(), 32, 0);
 477
 478     // Allocate the image data on 16 byte boundary for SSE2
 479     image->data = (char*)_mm_malloc(image->bytes_per_line * image->height, 16);
 480
 481     gc = XCreateGC(dpy, pixmap->handle(), 0, NULL);
 482
 483     blender = new BlendingThread(this);
 484     currentY = 0;
 485 }
 486
 487
 488 FadeEffect::~FadeEffect()
 489 {
 490     blender->wait();
 491     XDestroyImage(image);
 492     XFreeGC(QX11Info::display(), gc);
 493 }
 494
 495
 496 void FadeEffect::start()
 497 {
 498     done = false;
 499     alpha = 255;
 500
 501     // Start by grabbing the screenshot
 502     grabImageSection();
 503 }
 504
 505
 506 void FadeEffect::grabImageSection()
 507 {
 508     const int sectionHeight = 64;
 509     int h = (currentY + sectionHeight > image->height) ? image->height - currentY : sectionHeight;
 510
 511     XGetSubImage(QX11Info::display(), QX11Info::appRootWindow(), 0, currentY, image->width, h,
 512                  AllPlanes, ZPixmap, image, 0, currentY);
 513
 514     // Continue until we have the whole image
 515     currentY += sectionHeight;
 516     if (currentY < image->height)
 517     {
 518         QTimer::singleShot(1, this, SLOT(grabImageSection()));
 519         return;
 520     }
 521
 522     // Let the owner know we're done.
 523     emit initialized();
 524
 525     // Start the fade effect
 526     blender->setImage(image);
 527     blender->setAlpha(alpha);
 528     blender->start();
 529     time.start();
 530
 531     QTimer::singleShot(10, this, SLOT(nextFrame()));
 532 }
 533
 534
 535 void FadeEffect::nextFrame()
 536 {
 537     const qreal runTime = 2000; // milliseconds
 538
 539     if (!blender->isFinished())
 540     {
 541         QTimer::singleShot(10, this, SLOT(nextFrame()));
 542         return;
 543     }
 544
 545     XPutImage(QX11Info::display(), pixmap->handle(), gc, image, 0, 0, 0, 0, image->width, image->height);
 546     parent->update();
 547
 548     alpha = qRound(qMax(255. - (255. * (qreal(time.elapsed() / runTime))), 0.0));
 549
 550     if (!done)
 551     {
 552         blender->setAlpha(alpha);
 553         blender->start();
 554
 555         // Make sure we don't send frames faster than the X server can process them
 556         XSync(QX11Info::display(), False);
 557         QTimer::singleShot(1, this, SLOT(nextFrame()));
 558     }
 559
 560     if (alpha == 0)
 561         done = true;
 562 }
 563