media/base/yuv_convert.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 // This webpage shows layout of YV12 and other YUV formats
   6 // http://www.fourcc.org/yuv.php
   7 // The actual conversion is best described here
   8 // http://en.wikipedia.org/wiki/YUV
   9 // An article on optimizing YUV conversion using tables instead of multiplies
  10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
  11 //
  12 // YV12 is a full plane of Y and a half height, half width chroma planes
  13 // YV16 is a full plane of Y and a full height, half width chroma planes
  14 //
  15 // ARGB pixel format is output, which on little endian is stored as BGRA.
  16 // The alpha is set to 255, allowing the application to use RGBA or RGB32.
  17
  18 #include "media/base/yuv_convert.h"
  19
  20 #include "base/cpu.h"
  21 #include "base/logging.h"
  22 #include "base/memory/scoped_ptr.h"
  23 #include "build/build_config.h"
  24 #include "media/base/simd/convert_rgb_to_yuv.h"
  25 #include "media/base/simd/convert_yuv_to_rgb.h"
  26 #include "media/base/simd/filter_yuv.h"
  27
  28 #if defined(ARCH_CPU_X86_FAMILY)
  29 #if defined(COMPILER_MSVC)
  30 #include <intrin.h>
  31 #else
  32 #include <mmintrin.h>
  33 #endif
  34 #endif
  35
  36 // Assembly functions are declared without namespace.
  37 extern "C" {
  38 void EmptyRegisterState_MMX();
  39 }  // extern "C"
  40
  41 namespace media {
  42
  43 static FilterYUVRowsProc ChooseFilterYUVRowsProc() {
  44 #if defined(ARCH_CPU_X86_FAMILY)
  45   base::CPU cpu;
  46   if (cpu.has_sse2())
  47     return &FilterYUVRows_SSE2;
  48
  49 #if defined(MEDIA_MMX_INTRINSICS_AVAILABLE)
  50   if (cpu.has_mmx())
  51     return &FilterYUVRows_MMX;
  52 #endif  // defined(MEDIA_MMX_INTRINSICS_AVAILABLE)
  53 #endif  // defined(ARCH_CPU_X86_FAMILY)
  54   return &FilterYUVRows_C;
  55 }
  56
  57 static ConvertYUVToRGB32RowProc ChooseConvertYUVToRGB32RowProc() {
  58 #if defined(ARCH_CPU_X86_FAMILY)
  59   base::CPU cpu;
  60   if (cpu.has_sse())
  61     return &ConvertYUVToRGB32Row_SSE;
  62   if (cpu.has_mmx())
  63     return &ConvertYUVToRGB32Row_MMX;
  64 #endif
  65   return &ConvertYUVToRGB32Row_C;
  66 }
  67
  68 static ScaleYUVToRGB32RowProc ChooseScaleYUVToRGB32RowProc() {
  69 #if defined(ARCH_CPU_X86_64)
  70   // Use 64-bits version if possible.
  71   return &ScaleYUVToRGB32Row_SSE2_X64;
  72 #elif defined(ARCH_CPU_X86_FAMILY)
  73   base::CPU cpu;
  74   // Choose the best one on 32-bits system.
  75   if (cpu.has_sse())
  76     return &ScaleYUVToRGB32Row_SSE;
  77   if (cpu.has_mmx())
  78     return &ScaleYUVToRGB32Row_MMX;
  79 #endif  // defined(ARCH_CPU_X86_64)
  80   return &ScaleYUVToRGB32Row_C;
  81 }
  82
  83 static ScaleYUVToRGB32RowProc ChooseLinearScaleYUVToRGB32RowProc() {
  84 #if defined(ARCH_CPU_X86_64)
  85   // Use 64-bits version if possible.
  86   return &LinearScaleYUVToRGB32Row_MMX_X64;
  87 #elif defined(ARCH_CPU_X86_FAMILY)
  88   base::CPU cpu;
  89   // 32-bits system.
  90   if (cpu.has_sse())
  91     return &LinearScaleYUVToRGB32Row_SSE;
  92   if (cpu.has_mmx())
  93     return &LinearScaleYUVToRGB32Row_MMX;
  94 #endif  // defined(ARCH_CPU_X86_64)
  95   return &LinearScaleYUVToRGB32Row_C;
  96 }
  97
  98 // Empty SIMD registers state after using them.
  99 void EmptyRegisterState() {
 100 #if defined(ARCH_CPU_X86_FAMILY)
 101   static bool checked = false;
 102   static bool has_mmx = false;
 103   if (!checked) {
 104     base::CPU cpu;
 105     has_mmx = cpu.has_mmx();
 106     checked = true;
 107   }
 108
 109   if (has_mmx) {
 110 #if defined(MEDIA_MMX_INTRINSICS_AVAILABLE)
 111     _mm_empty();
 112 #else
 113     EmptyRegisterState_MMX();
 114 #endif  // defined(MEDIA_MMX_INTRINSICS_AVAILABLE)
 115   }
 116
 117 #endif  // defined(ARCH_CPU_X86_FAMILY)
 118 }
 119
 120 // 16.16 fixed point arithmetic
 121 const int kFractionBits = 16;
 122 const int kFractionMax = 1 << kFractionBits;
 123 const int kFractionMask = ((1 << kFractionBits) - 1);
 124
 125 // Scale a frame of YUV to 32 bit ARGB.
 126 void ScaleYUVToRGB32(const uint8* y_buf,
 127                      const uint8* u_buf,
 128                      const uint8* v_buf,
 129                      uint8* rgb_buf,
 130                      int source_width,
 131                      int source_height,
 132                      int width,
 133                      int height,
 134                      int y_pitch,
 135                      int uv_pitch,
 136                      int rgb_pitch,
 137                      YUVType yuv_type,
 138                      Rotate view_rotate,
 139                      ScaleFilter filter) {
 140   static FilterYUVRowsProc filter_proc = NULL;
 141   static ConvertYUVToRGB32RowProc convert_proc = NULL;
 142   static ScaleYUVToRGB32RowProc scale_proc = NULL;
 143   static ScaleYUVToRGB32RowProc linear_scale_proc = NULL;
 144
 145   if (!filter_proc)
 146     filter_proc = ChooseFilterYUVRowsProc();
 147   if (!convert_proc)
 148     convert_proc = ChooseConvertYUVToRGB32RowProc();
 149   if (!scale_proc)
 150     scale_proc = ChooseScaleYUVToRGB32RowProc();
 151   if (!linear_scale_proc)
 152     linear_scale_proc = ChooseLinearScaleYUVToRGB32RowProc();
 153
 154   // Handle zero sized sources and destinations.
 155   if ((yuv_type == YV12 && (source_width < 2 || source_height < 2)) ||
 156       (yuv_type == YV16 && (source_width < 2 || source_height < 1)) ||
 157       width == 0 || height == 0)
 158     return;
 159
 160   // 4096 allows 3 buffers to fit in 12k.
 161   // Helps performance on CPU with 16K L1 cache.
 162   // Large enough for 3830x2160 and 30" displays which are 2560x1600.
 163   const int kFilterBufferSize = 4096;
 164   // Disable filtering if the screen is too big (to avoid buffer overflows).
 165   // This should never happen to regular users: they don't have monitors
 166   // wider than 4096 pixels.
 167   // TODO(fbarchard): Allow rotated videos to filter.
 168   if (source_width > kFilterBufferSize || view_rotate)
 169     filter = FILTER_NONE;
 170
 171   unsigned int y_shift = yuv_type;
 172   // Diagram showing origin and direction of source sampling.
 173   // ->0   4<-
 174   // 7       3
 175   //
 176   // 6       5
 177   // ->1   2<-
 178   // Rotations that start at right side of image.
 179   if ((view_rotate == ROTATE_180) ||
 180       (view_rotate == ROTATE_270) ||
 181       (view_rotate == MIRROR_ROTATE_0) ||
 182       (view_rotate == MIRROR_ROTATE_90)) {
 183     y_buf += source_width - 1;
 184     u_buf += source_width / 2 - 1;
 185     v_buf += source_width / 2 - 1;
 186     source_width = -source_width;
 187   }
 188   // Rotations that start at bottom of image.
 189   if ((view_rotate == ROTATE_90) ||
 190       (view_rotate == ROTATE_180) ||
 191       (view_rotate == MIRROR_ROTATE_90) ||
 192       (view_rotate == MIRROR_ROTATE_180)) {
 193     y_buf += (source_height - 1) * y_pitch;
 194     u_buf += ((source_height >> y_shift) - 1) * uv_pitch;
 195     v_buf += ((source_height >> y_shift) - 1) * uv_pitch;
 196     source_height = -source_height;
 197   }
 198
 199   int source_dx = source_width * kFractionMax / width;
 200
 201   if ((view_rotate == ROTATE_90) ||
 202       (view_rotate == ROTATE_270)) {
 203     int tmp = height;
 204     height = width;
 205     width = tmp;
 206     tmp = source_height;
 207     source_height = source_width;
 208     source_width = tmp;
 209     int source_dy = source_height * kFractionMax / height;
 210     source_dx = ((source_dy >> kFractionBits) * y_pitch) << kFractionBits;
 211     if (view_rotate == ROTATE_90) {
 212       y_pitch = -1;
 213       uv_pitch = -1;
 214       source_height = -source_height;
 215     } else {
 216       y_pitch = 1;
 217       uv_pitch = 1;
 218     }
 219   }
 220
 221   // Need padding because FilterRows() will write 1 to 16 extra pixels
 222   // after the end for SSE2 version.
 223   uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16];
 224   uint8* ybuf =
 225       reinterpret_cast<uint8*>(reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15);
 226   uint8* ubuf = ybuf + kFilterBufferSize;
 227   uint8* vbuf = ubuf + kFilterBufferSize;
 228
 229   // TODO(fbarchard): Fixed point math is off by 1 on negatives.
 230
 231   // We take a y-coordinate in [0,1] space in the source image space, and
 232   // transform to a y-coordinate in [0,1] space in the destination image space.
 233   // Note that the coordinate endpoints lie on pixel boundaries, not on pixel
 234   // centers: e.g. a two-pixel-high image will have pixel centers at 0.25 and
 235   // 0.75.  The formula is as follows (in fixed-point arithmetic):
 236   //   y_dst = dst_height * ((y_src + 0.5) / src_height)
 237   //   dst_pixel = clamp([0, dst_height - 1], floor(y_dst - 0.5))
 238   // Implement this here as an accumulator + delta, to avoid expensive math
 239   // in the loop.
 240   int source_y_subpixel_accum =
 241     ((kFractionMax / 2) * source_height) / height - (kFractionMax / 2);
 242   int source_y_subpixel_delta = ((1 << kFractionBits) * source_height) / height;
 243
 244   // TODO(fbarchard): Split this into separate function for better efficiency.
 245   for (int y = 0; y < height; ++y) {
 246     uint8* dest_pixel = rgb_buf + y * rgb_pitch;
 247     int source_y_subpixel = source_y_subpixel_accum;
 248     source_y_subpixel_accum += source_y_subpixel_delta;
 249     if (source_y_subpixel < 0)
 250       source_y_subpixel = 0;
 251     else if (source_y_subpixel > ((source_height - 1) << kFractionBits))
 252       source_y_subpixel = (source_height - 1) << kFractionBits;
 253
 254     const uint8* y_ptr = NULL;
 255     const uint8* u_ptr = NULL;
 256     const uint8* v_ptr = NULL;
 257     // Apply vertical filtering if necessary.
 258     // TODO(fbarchard): Remove memcpy when not necessary.
 259     if (filter & media::FILTER_BILINEAR_V) {
 260       int source_y = source_y_subpixel >> kFractionBits;
 261       y_ptr = y_buf + source_y * y_pitch;
 262       u_ptr = u_buf + (source_y >> y_shift) * uv_pitch;
 263       v_ptr = v_buf + (source_y >> y_shift) * uv_pitch;
 264
 265       // Vertical scaler uses 16.8 fixed point.
 266       int source_y_fraction =
 267           (source_y_subpixel & kFractionMask) >> 8;
 268       if (source_y_fraction != 0) {
 269         filter_proc(ybuf, y_ptr, y_ptr + y_pitch, source_width,
 270                     source_y_fraction);
 271       } else {
 272         memcpy(ybuf, y_ptr, source_width);
 273       }
 274       y_ptr = ybuf;
 275       ybuf[source_width] = ybuf[source_width-1];
 276
 277       int uv_source_width = (source_width + 1) / 2;
 278       int source_uv_fraction;
 279
 280       // For formats with half-height UV planes, each even-numbered pixel row
 281       // should not interpolate, since the next row to interpolate from should
 282       // be a duplicate of the current row.
 283       if (y_shift && (source_y & 0x1) == 0)
 284         source_uv_fraction = 0;
 285       else
 286         source_uv_fraction = source_y_fraction;
 287
 288       if (source_uv_fraction != 0) {
 289         filter_proc(ubuf, u_ptr, u_ptr + uv_pitch, uv_source_width,
 290             source_uv_fraction);
 291         filter_proc(vbuf, v_ptr, v_ptr + uv_pitch, uv_source_width,
 292             source_uv_fraction);
 293       } else {
 294         memcpy(ubuf, u_ptr, uv_source_width);
 295         memcpy(vbuf, v_ptr, uv_source_width);
 296       }
 297       u_ptr = ubuf;
 298       v_ptr = vbuf;
 299       ubuf[uv_source_width] = ubuf[uv_source_width - 1];
 300       vbuf[uv_source_width] = vbuf[uv_source_width - 1];
 301     } else {
 302       // Offset by 1/2 pixel for center sampling.
 303       int source_y = (source_y_subpixel + (kFractionMax / 2)) >> kFractionBits;
 304       y_ptr = y_buf + source_y * y_pitch;
 305       u_ptr = u_buf + (source_y >> y_shift) * uv_pitch;
 306       v_ptr = v_buf + (source_y >> y_shift) * uv_pitch;
 307     }
 308     if (source_dx == kFractionMax) {  // Not scaled
 309       convert_proc(y_ptr, u_ptr, v_ptr, dest_pixel, width);
 310     } else {
 311       if (filter & FILTER_BILINEAR_H) {
 312         linear_scale_proc(y_ptr, u_ptr, v_ptr, dest_pixel, width, source_dx);
 313       } else {
 314         scale_proc(y_ptr, u_ptr, v_ptr, dest_pixel, width, source_dx);
 315       }
 316     }
 317   }
 318
 319   EmptyRegisterState();
 320 }
 321
 322 // Scale a frame of YV12 to 32 bit ARGB for a specific rectangle.
 323 void ScaleYUVToRGB32WithRect(const uint8* y_buf,
 324                              const uint8* u_buf,
 325                              const uint8* v_buf,
 326                              uint8* rgb_buf,
 327                              int source_width,
 328                              int source_height,
 329                              int dest_width,
 330                              int dest_height,
 331                              int dest_rect_left,
 332                              int dest_rect_top,
 333                              int dest_rect_right,
 334                              int dest_rect_bottom,
 335                              int y_pitch,
 336                              int uv_pitch,
 337                              int rgb_pitch) {
 338   static FilterYUVRowsProc filter_proc = NULL;
 339   if (!filter_proc)
 340     filter_proc = ChooseFilterYUVRowsProc();
 341
 342   // This routine doesn't currently support up-scaling.
 343   CHECK_LE(dest_width, source_width);
 344   CHECK_LE(dest_height, source_height);
 345
 346   // Sanity-check the destination rectangle.
 347   DCHECK(dest_rect_left >= 0 && dest_rect_right <= dest_width);
 348   DCHECK(dest_rect_top >= 0 && dest_rect_bottom <= dest_height);
 349   DCHECK(dest_rect_right > dest_rect_left);
 350   DCHECK(dest_rect_bottom > dest_rect_top);
 351
 352   // Fixed-point value of vertical and horizontal scale down factor.
 353   // Values are in the format 16.16.
 354   int y_step = kFractionMax * source_height / dest_height;
 355   int x_step = kFractionMax * source_width / dest_width;
 356
 357   // Determine the coordinates of the rectangle in 16.16 coords.
 358   // NB: Our origin is the *center* of the top/left pixel, NOT its top/left.
 359   // If we're down-scaling by more than a factor of two, we start with a 50%
 360   // fraction to avoid degenerating to point-sampling - we should really just
 361   // fix the fraction at 50% for all pixels in that case.
 362   int source_left = dest_rect_left * x_step;
 363   int source_right = (dest_rect_right - 1) * x_step;
 364   if (x_step < kFractionMax * 2) {
 365     source_left += ((x_step - kFractionMax) / 2);
 366     source_right += ((x_step - kFractionMax) / 2);
 367   } else {
 368     source_left += kFractionMax / 2;
 369     source_right += kFractionMax / 2;
 370   }
 371   int source_top = dest_rect_top * y_step;
 372   if (y_step < kFractionMax * 2) {
 373     source_top += ((y_step - kFractionMax) / 2);
 374   } else {
 375     source_top += kFractionMax / 2;
 376   }
 377
 378   // Determine the parts of the Y, U and V buffers to interpolate.
 379   int source_y_left = source_left >> kFractionBits;
 380   int source_y_right = std::min(
 381       (source_right >> kFractionBits) + 2,
 382       source_width + 1);
 383
 384   int source_uv_left = source_y_left / 2;
 385   int source_uv_right = std::min(
 386       (source_right >> (kFractionBits + 1)) + 2,
 387       (source_width + 1) / 2);
 388
 389   int source_y_width = source_y_right - source_y_left;
 390   int source_uv_width = source_uv_right - source_uv_left;
 391
 392   // Determine number of pixels in each output row.
 393   int dest_rect_width = dest_rect_right - dest_rect_left;
 394
 395   // Intermediate buffer for vertical interpolation.
 396   // 4096 bytes allows 3 buffers to fit in 12k, which fits in a 16K L1 cache,
 397   // and is bigger than most users will generally need.
 398   // The buffer is 16-byte aligned and padded with 16 extra bytes; some of the
 399   // FilterYUVRowProcs have alignment requirements, and the SSE version can
 400   // write up to 16 bytes past the end of the buffer.
 401   const int kFilterBufferSize = 4096;
 402   if (source_width > kFilterBufferSize)
 403     filter_proc = NULL;
 404   uint8 yuv_temp[16 + kFilterBufferSize * 3 + 16];
 405   uint8* y_temp =
 406       reinterpret_cast<uint8*>(
 407           reinterpret_cast<uintptr_t>(yuv_temp + 15) & ~15);
 408   uint8* u_temp = y_temp + kFilterBufferSize;
 409   uint8* v_temp = u_temp + kFilterBufferSize;
 410
 411   // Move to the top-left pixel of output.
 412   rgb_buf += dest_rect_top * rgb_pitch;
 413   rgb_buf += dest_rect_left * 4;
 414
 415   // For each destination row perform interpolation and color space
 416   // conversion to produce the output.
 417   for (int row = dest_rect_top; row < dest_rect_bottom; ++row) {
 418     // Round the fixed-point y position to get the current row.
 419     int source_row = source_top >> kFractionBits;
 420     int source_uv_row = source_row / 2;
 421     DCHECK(source_row < source_height);
 422
 423     // Locate the first row for each plane for interpolation.
 424     const uint8* y0_ptr = y_buf + y_pitch * source_row + source_y_left;
 425     const uint8* u0_ptr = u_buf + uv_pitch * source_uv_row + source_uv_left;
 426     const uint8* v0_ptr = v_buf + uv_pitch * source_uv_row + source_uv_left;
 427     const uint8* y1_ptr = NULL;
 428     const uint8* u1_ptr = NULL;
 429     const uint8* v1_ptr = NULL;
 430
 431     // Locate the second row for interpolation, being careful not to overrun.
 432     if (source_row + 1 >= source_height) {
 433       y1_ptr = y0_ptr;
 434     } else {
 435       y1_ptr = y0_ptr + y_pitch;
 436     }
 437     if (source_uv_row + 1 >= (source_height + 1) / 2) {
 438       u1_ptr = u0_ptr;
 439       v1_ptr = v0_ptr;
 440     } else {
 441       u1_ptr = u0_ptr + uv_pitch;
 442       v1_ptr = v0_ptr + uv_pitch;
 443     }
 444
 445     if (filter_proc) {
 446       // Vertical scaler uses 16.8 fixed point.
 447       int fraction = (source_top & kFractionMask) >> 8;
 448       filter_proc(y_temp + source_y_left, y0_ptr, y1_ptr,
 449                   source_y_width, fraction);
 450       filter_proc(u_temp + source_uv_left, u0_ptr, u1_ptr,
 451                   source_uv_width, fraction);
 452       filter_proc(v_temp + source_uv_left, v0_ptr, v1_ptr,
 453                   source_uv_width, fraction);
 454
 455       // Perform horizontal interpolation and color space conversion.
 456       // TODO(hclam): Use the MMX version after more testing.
 457       LinearScaleYUVToRGB32RowWithRange_C(
 458           y_temp, u_temp, v_temp, rgb_buf,
 459           dest_rect_width, source_left, x_step);
 460     } else {
 461       // If the frame is too large then we linear scale a single row.
 462       LinearScaleYUVToRGB32RowWithRange_C(
 463           y0_ptr, u0_ptr, v0_ptr, rgb_buf,
 464           dest_rect_width, source_left, x_step);
 465     }
 466
 467     // Advance vertically in the source and destination image.
 468     source_top += y_step;
 469     rgb_buf += rgb_pitch;
 470   }
 471
 472   EmptyRegisterState();
 473 }
 474
 475 void ConvertRGB32ToYUV(const uint8* rgbframe,
 476                        uint8* yplane,
 477                        uint8* uplane,
 478                        uint8* vplane,
 479                        int width,
 480                        int height,
 481                        int rgbstride,
 482                        int ystride,
 483                        int uvstride) {
 484   static void (*convert_proc)(const uint8*, uint8*, uint8*, uint8*,
 485                               int, int, int, int, int) = NULL;
 486   if (!convert_proc) {
 487 #if defined(ARCH_CPU_ARM_FAMILY) || defined(ARCH_CPU_MIPS_FAMILY)
 488     // For ARM and MIPS processors, always use C version.
 489     // TODO(hclam): Implement a NEON version.
 490     convert_proc = &ConvertRGB32ToYUV_C;
 491 #else
 492     // TODO(hclam): Switch to SSSE3 version when the cyan problem is solved.
 493     // See: crbug.com/100462
 494     base::CPU cpu;
 495     if (cpu.has_sse2())
 496       convert_proc = &ConvertRGB32ToYUV_SSE2;
 497     else
 498       convert_proc = &ConvertRGB32ToYUV_C;
 499 #endif
 500   }
 501
 502   convert_proc(rgbframe, yplane, uplane, vplane, width, height,
 503                rgbstride, ystride, uvstride);
 504 }
 505
 506 void ConvertRGB24ToYUV(const uint8* rgbframe,
 507                        uint8* yplane,
 508                        uint8* uplane,
 509                        uint8* vplane,
 510                        int width,
 511                        int height,
 512                        int rgbstride,
 513                        int ystride,
 514                        int uvstride) {
 515 #if defined(ARCH_CPU_ARM_FAMILY) || defined(ARCH_CPU_MIPS_FAMILY)
 516   ConvertRGB24ToYUV_C(rgbframe, yplane, uplane, vplane, width, height,
 517                       rgbstride, ystride, uvstride);
 518 #else
 519   static void (*convert_proc)(const uint8*, uint8*, uint8*, uint8*,
 520                               int, int, int, int, int) = NULL;
 521   if (!convert_proc) {
 522     base::CPU cpu;
 523     if (cpu.has_ssse3())
 524       convert_proc = &ConvertRGB24ToYUV_SSSE3;
 525     else
 526       convert_proc = &ConvertRGB24ToYUV_C;
 527   }
 528   convert_proc(rgbframe, yplane, uplane, vplane, width, height,
 529                rgbstride, ystride, uvstride);
 530 #endif
 531 }
 532
 533 void ConvertYUY2ToYUV(const uint8* src,
 534                       uint8* yplane,
 535                       uint8* uplane,
 536                       uint8* vplane,
 537                       int width,
 538                       int height) {
 539   for (int i = 0; i < height / 2; ++i) {
 540     for (int j = 0; j < (width / 2); ++j) {
 541       yplane[0] = src[0];
 542       *uplane = src[1];
 543       yplane[1] = src[2];
 544       *vplane = src[3];
 545       src += 4;
 546       yplane += 2;
 547       uplane++;
 548       vplane++;
 549     }
 550     for (int j = 0; j < (width / 2); ++j) {
 551       yplane[0] = src[0];
 552       yplane[1] = src[2];
 553       src += 4;
 554       yplane += 2;
 555     }
 556   }
 557 }
 558
 559 void ConvertNV21ToYUV(const uint8* src,
 560                       uint8* yplane,
 561                       uint8* uplane,
 562                       uint8* vplane,
 563                       int width,
 564                       int height) {
 565   int y_plane_size = width * height;
 566   memcpy(yplane, src, y_plane_size);
 567
 568   src += y_plane_size;
 569   int u_plane_size = y_plane_size >> 2;
 570   for (int i = 0; i < u_plane_size; ++i) {
 571     *vplane++ = *src++;
 572     *uplane++ = *src++;
 573   }
 574 }
 575
 576 void ConvertYUVToRGB32(const uint8* yplane,
 577                        const uint8* uplane,
 578                        const uint8* vplane,
 579                        uint8* rgbframe,
 580                        int width,
 581                        int height,
 582                        int ystride,
 583                        int uvstride,
 584                        int rgbstride,
 585                        YUVType yuv_type) {
 586 #if defined(ARCH_CPU_ARM_FAMILY) || defined(ARCH_CPU_MIPS_FAMILY)
 587   ConvertYUVToRGB32_C(yplane, uplane, vplane, rgbframe,
 588                       width, height, ystride, uvstride, rgbstride, yuv_type);
 589 #else
 590   static ConvertYUVToRGB32Proc convert_proc = NULL;
 591   if (!convert_proc) {
 592     base::CPU cpu;
 593     if (cpu.has_sse())
 594       convert_proc = &ConvertYUVToRGB32_SSE;
 595     else if (cpu.has_mmx())
 596       convert_proc = &ConvertYUVToRGB32_MMX;
 597     else
 598       convert_proc = &ConvertYUVToRGB32_C;
 599   }
 600
 601   convert_proc(yplane, uplane, vplane, rgbframe,
 602                width, height, ystride, uvstride, rgbstride, yuv_type);
 603 #endif
 604 }
 605
 606 void ConvertYUVAToARGB(const uint8* yplane,
 607                        const uint8* uplane,
 608                        const uint8* vplane,
 609                        const uint8* aplane,
 610                        uint8* rgbframe,
 611                        int width,
 612                        int height,
 613                        int ystride,
 614                        int uvstride,
 615                        int astride,
 616                        int rgbstride,
 617                        YUVType yuv_type) {
 618 #if defined(ARCH_CPU_ARM_FAMILY) || defined(ARCH_CPU_MIPS_FAMILY)
 619   ConvertYUVAToARGB_C(yplane, uplane, vplane, aplane, rgbframe,
 620                       width, height, ystride, uvstride, astride, rgbstride,
 621                       yuv_type);
 622 #else
 623   static ConvertYUVAToARGBProc convert_proc = NULL;
 624   if (!convert_proc) {
 625     base::CPU cpu;
 626     if (cpu.has_mmx())
 627       convert_proc = &ConvertYUVAToARGB_MMX;
 628     else
 629       convert_proc = &ConvertYUVAToARGB_C;
 630   }
 631   convert_proc(yplane, uplane, vplane, aplane, rgbframe,
 632                width, height, ystride, uvstride, astride, rgbstride, yuv_type);
 633 #endif
 634 }
 635
 636 }  // namespace media