Adding Peter Thatcher to the owners file.
[chromium-blink-merge.git] / gpu / perftests / texture_upload_perftest.cc
blob3a1af627acd344cb5e79dae661b594da677437e9
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <algorithm>
6 #include <vector>
8 #include "base/containers/small_map.h"
9 #include "base/logging.h"
10 #include "base/memory/ref_counted.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/strings/stringprintf.h"
13 #include "gpu/perftests/measurements.h"
14 #include "testing/gmock/include/gmock/gmock.h"
15 #include "testing/gtest/include/gtest/gtest.h"
16 #include "testing/perf/perf_test.h"
17 #include "ui/gfx/geometry/size.h"
18 #include "ui/gfx/geometry/vector2d_f.h"
19 #include "ui/gl/gl_bindings.h"
20 #include "ui/gl/gl_context.h"
21 #include "ui/gl/gl_enums.h"
22 #include "ui/gl/gl_surface.h"
23 #include "ui/gl/gl_version_info.h"
24 #include "ui/gl/gpu_timing.h"
25 #include "ui/gl/scoped_make_current.h"
27 namespace gpu {
28 namespace {
30 const int kUploadPerfWarmupRuns = 5;
31 const int kUploadPerfTestRuns = 30;
33 #define SHADER(Src) #Src
35 // clang-format off
36 const char kVertexShader[] =
37 SHADER(
38 uniform vec2 translation;
39 attribute vec2 a_position;
40 attribute vec2 a_texCoord;
41 varying vec2 v_texCoord;
42 void main() {
43 gl_Position = vec4(
44 translation.x + a_position.x, translation.y + a_position.y, 0.0, 1.0);
45 v_texCoord = a_texCoord;
48 const char kShaderDefaultFloatPrecision[] =
49 SHADER(
50 precision mediump float;
52 const char kFragmentShader[] =
53 SHADER(
54 uniform sampler2D a_texture;
55 varying vec2 v_texCoord;
56 void main() {
57 gl_FragColor = texture2D(a_texture, v_texCoord);
60 // clang-format on
62 void CheckNoGlError(const std::string& msg) {
63 CHECK_EQ(static_cast<GLenum>(GL_NO_ERROR), glGetError()) << " " << msg;
66 // Utility function to compile a shader from a string.
67 GLuint LoadShader(const GLenum type, const char* const src) {
68 GLuint shader = 0;
69 shader = glCreateShader(type);
70 CHECK_NE(0u, shader);
71 glShaderSource(shader, 1, &src, NULL);
72 glCompileShader(shader);
74 GLint compiled = 0;
75 glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
76 if (compiled == 0) {
77 GLint len = 0;
78 glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &len);
79 if (len > 1) {
80 scoped_ptr<char[]> error_log(new char[len]);
81 glGetShaderInfoLog(shader, len, NULL, error_log.get());
82 LOG(ERROR) << "Error compiling shader: " << error_log.get();
85 CHECK_NE(0, compiled);
86 return shader;
89 int GLFormatBytePerPixel(GLenum format) {
90 DCHECK(format == GL_RGBA || format == GL_LUMINANCE || format == GL_RED_EXT);
91 return format == GL_RGBA ? 4 : 1;
94 GLenum GLFormatToInternalFormat(GLenum format) {
95 return format == GL_RED ? GL_R8 : format;
98 GLenum GLFormatToStorageFormat(GLenum format) {
99 switch (format) {
100 case GL_RGBA:
101 return GL_RGBA8;
102 case GL_LUMINANCE:
103 return GL_LUMINANCE8;
104 case GL_RED:
105 return GL_R8;
106 default:
107 NOTREACHED();
109 return 0;
112 void GenerateTextureData(const gfx::Size& size,
113 int bytes_per_pixel,
114 const int seed,
115 std::vector<uint8>* const pixels) {
116 // Row bytes has to be multiple of 4 (GL_PACK_ALIGNMENT defaults to 4).
117 int stride = ((size.width() * bytes_per_pixel) + 3) & ~0x3;
118 pixels->resize(size.height() * stride);
119 for (int y = 0; y < size.height(); ++y) {
120 for (int x = 0; x < size.width(); ++x) {
121 for (int channel = 0; channel < bytes_per_pixel; ++channel) {
122 int index = y * stride + x * bytes_per_pixel;
123 pixels->at(index) = (index + (seed << 2)) % (0x20 << channel);
129 // Compare a buffer containing pixels in a specified format to GL_RGBA buffer
130 // where the former buffer have been uploaded as a texture and drawn on the
131 // RGBA buffer.
132 bool CompareBufferToRGBABuffer(GLenum format,
133 const gfx::Size& size,
134 const std::vector<uint8>& pixels,
135 const std::vector<uint8>& rgba) {
136 int bytes_per_pixel = GLFormatBytePerPixel(format);
137 int pixels_stride = ((size.width() * bytes_per_pixel) + 3) & ~0x3;
138 int rgba_stride = size.width() * GLFormatBytePerPixel(GL_RGBA);
139 for (int y = 0; y < size.height(); ++y) {
140 for (int x = 0; x < size.width(); ++x) {
141 int rgba_index = y * rgba_stride + x * GLFormatBytePerPixel(GL_RGBA);
142 int pixels_index = y * pixels_stride + x * bytes_per_pixel;
143 uint8 expected[4] = {0};
144 switch (format) {
145 case GL_LUMINANCE: // (L_t, L_t, L_t, 1)
146 expected[1] = pixels[pixels_index];
147 expected[2] = pixels[pixels_index];
148 case GL_RED: // (R_t, 0, 0, 1)
149 expected[0] = pixels[pixels_index];
150 expected[3] = 255;
151 break;
152 case GL_RGBA: // (R_t, G_t, B_t, A_t)
153 memcpy(expected, &pixels[pixels_index], 4);
154 break;
155 default:
156 NOTREACHED();
158 if (memcmp(&rgba[rgba_index], expected, 4)) {
159 return false;
163 return true;
166 // PerfTest to check costs of texture upload at different stages
167 // on different platforms.
168 class TextureUploadPerfTest : public testing::Test {
169 public:
170 TextureUploadPerfTest() : fbo_size_(1024, 1024) {}
172 // Overridden from testing::Test
173 void SetUp() override {
174 static bool gl_initialized = gfx::GLSurface::InitializeOneOff();
175 DCHECK(gl_initialized);
176 // Initialize an offscreen surface and a gl context.
177 surface_ = gfx::GLSurface::CreateOffscreenGLSurface(gfx::Size(4, 4));
178 gl_context_ = gfx::GLContext::CreateGLContext(NULL, // share_group
179 surface_.get(),
180 gfx::PreferIntegratedGpu);
181 ui::ScopedMakeCurrent smc(gl_context_.get(), surface_.get());
182 glGenTextures(1, &color_texture_);
183 glBindTexture(GL_TEXTURE_2D, color_texture_);
184 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
185 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
186 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
187 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
188 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, fbo_size_.width(),
189 fbo_size_.height(), 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
191 glGenFramebuffersEXT(1, &framebuffer_object_);
192 glBindFramebufferEXT(GL_FRAMEBUFFER, framebuffer_object_);
194 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
195 GL_TEXTURE_2D, color_texture_, 0);
196 DCHECK_EQ(static_cast<GLenum>(GL_FRAMEBUFFER_COMPLETE),
197 glCheckFramebufferStatusEXT(GL_FRAMEBUFFER));
199 glViewport(0, 0, fbo_size_.width(), fbo_size_.height());
200 gpu_timing_client_ = gl_context_->CreateGPUTimingClient();
202 if (gpu_timing_client_->IsAvailable()) {
203 LOG(INFO) << "Gpu timing initialized with timer type: "
204 << gpu_timing_client_->GetTimerTypeName();
205 gpu_timing_client_->InvalidateTimerOffset();
206 } else {
207 LOG(WARNING) << "Can't initialize gpu timing";
209 // Prepare a simple program and a vertex buffer that will be
210 // used to draw a quad on the offscreen surface.
211 vertex_shader_ = LoadShader(GL_VERTEX_SHADER, kVertexShader);
213 bool is_gles = gfx::GetGLImplementation() == gfx::kGLImplementationEGLGLES2;
214 fragment_shader_ = LoadShader(
215 GL_FRAGMENT_SHADER,
216 base::StringPrintf("%s%s", is_gles ? kShaderDefaultFloatPrecision : "",
217 kFragmentShader).c_str());
218 program_object_ = glCreateProgram();
219 CHECK_NE(0u, program_object_);
221 glAttachShader(program_object_, vertex_shader_);
222 glAttachShader(program_object_, fragment_shader_);
223 glBindAttribLocation(program_object_, 0, "a_position");
224 glBindAttribLocation(program_object_, 1, "a_texCoord");
225 glLinkProgram(program_object_);
227 GLint linked = -1;
228 glGetProgramiv(program_object_, GL_LINK_STATUS, &linked);
229 CHECK_NE(0, linked);
230 glUseProgram(program_object_);
231 glUniform1i(sampler_location_, 0);
232 translation_location_ =
233 glGetUniformLocation(program_object_, "translation");
234 DCHECK_NE(-1, translation_location_);
235 glUniform2f(translation_location_, 0.0f, 0.0f);
237 sampler_location_ = glGetUniformLocation(program_object_, "a_texture");
238 CHECK_NE(-1, sampler_location_);
240 glGenBuffersARB(1, &vertex_buffer_);
241 CHECK_NE(0u, vertex_buffer_);
242 DCHECK_NE(0u, vertex_buffer_);
243 glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_);
244 glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4, 0);
245 glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4,
246 reinterpret_cast<void*>(sizeof(GLfloat) * 2));
247 glEnableVertexAttribArray(0);
248 glEnableVertexAttribArray(1);
249 CheckNoGlError("glEnableVertexAttribArray");
251 has_texture_storage_ =
252 gl_context_->GetVersionInfo()->is_es3 ||
253 gl_context_->HasExtension("GL_EXT_texture_storage") ||
254 gl_context_->HasExtension("GL_ARB_texture_storage");
257 void GenerateVertexBuffer(const gfx::Size& size) {
258 DCHECK_NE(0u, vertex_buffer_);
259 glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_);
260 // right and top are in clipspace
261 float right = -1.f + 2.f * size.width() / fbo_size_.width();
262 float top = -1.f + 2.f * size.height() / fbo_size_.height();
263 // Four vertexes, one per line. Each vertex has two components per
264 // position and two per texcoord.
265 // It represents a quad formed by two triangles if interpreted
266 // as a tristrip.
268 // clang-format off
269 GLfloat data[16] = {
270 -1.f, -1.f, 0.f, 0.f,
271 right, -1.f, 1.f, 0.f,
272 -1.f, top, 0.f, 1.f,
273 right, top, 1.f, 1.f};
274 // clang-format on
275 glBufferData(GL_ARRAY_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
276 CheckNoGlError("glBufferData");
279 void TearDown() override {
280 ui::ScopedMakeCurrent smc(gl_context_.get(), surface_.get());
281 glDeleteProgram(program_object_);
282 glDeleteShader(vertex_shader_);
283 glDeleteShader(fragment_shader_);
284 glDeleteBuffersARB(1, &vertex_buffer_);
286 glBindFramebufferEXT(GL_FRAMEBUFFER, 0);
287 glDeleteFramebuffersEXT(1, &framebuffer_object_);
288 glDeleteTextures(1, &color_texture_);
289 CheckNoGlError("glDeleteTextures");
291 gpu_timing_client_ = nullptr;
292 gl_context_ = nullptr;
293 surface_ = nullptr;
296 protected:
297 GLuint CreateGLTexture(const GLenum format,
298 const gfx::Size& size,
299 const bool specify_storage) {
300 GLuint texture_id = 0;
301 glActiveTexture(GL_TEXTURE0);
302 glGenTextures(1, &texture_id);
303 glBindTexture(GL_TEXTURE_2D, texture_id);
304 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
305 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
306 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
307 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
308 if (specify_storage) {
309 if (has_texture_storage_) {
310 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GLFormatToStorageFormat(format),
311 size.width(), size.height());
312 CheckNoGlError("glTexStorage2DEXT");
313 } else {
314 glTexImage2D(GL_TEXTURE_2D, 0, GLFormatToInternalFormat(format),
315 size.width(), size.height(), 0, format, GL_UNSIGNED_BYTE,
316 nullptr);
317 CheckNoGlError("glTexImage2D");
320 return texture_id;
323 void UploadTexture(GLuint texture_id,
324 const gfx::Size& size,
325 const std::vector<uint8>& pixels,
326 GLenum format,
327 const bool subimage) {
328 if (subimage) {
329 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, size.width(), size.height(),
330 format, GL_UNSIGNED_BYTE, &pixels[0]);
331 CheckNoGlError("glTexSubImage2D");
332 } else {
333 glTexImage2D(GL_TEXTURE_2D, 0, GLFormatToInternalFormat(format),
334 size.width(), size.height(), 0, format, GL_UNSIGNED_BYTE,
335 &pixels[0]);
336 CheckNoGlError("glTexImage2D");
340 // Upload and draw on the offscren surface.
341 // Return a list of pair. Each pair describe a gl operation and the wall
342 // time elapsed in milliseconds.
343 std::vector<Measurement> UploadAndDraw(GLuint texture_id,
344 const gfx::Size& size,
345 const std::vector<uint8>& pixels,
346 const GLenum format,
347 const bool subimage) {
348 MeasurementTimers tex_timers(gpu_timing_client_.get());
349 UploadTexture(texture_id, size, pixels, format, subimage);
350 tex_timers.Record();
352 MeasurementTimers first_draw_timers(gpu_timing_client_.get());
353 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
354 first_draw_timers.Record();
356 MeasurementTimers draw_timers(gpu_timing_client_.get());
357 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
358 draw_timers.Record();
360 MeasurementTimers finish_timers(gpu_timing_client_.get());
361 glFinish();
362 CheckNoGlError("glFinish");
363 finish_timers.Record();
365 std::vector<uint8> pixels_rendered(size.GetArea() * 4);
366 glReadPixels(0, 0, size.width(), size.height(), GL_RGBA, GL_UNSIGNED_BYTE,
367 &pixels_rendered[0]);
368 CheckNoGlError("glReadPixels");
369 EXPECT_TRUE(
370 CompareBufferToRGBABuffer(format, size, pixels, pixels_rendered))
371 << "Format is: " << gfx::GLEnums::GetStringEnum(format);
373 std::vector<Measurement> measurements;
374 bool gpu_timer_errors =
375 gpu_timing_client_->IsAvailable() &&
376 gpu_timing_client_->CheckAndResetTimerErrors();
377 if (!gpu_timer_errors) {
378 measurements.push_back(tex_timers.GetAsMeasurement(
379 subimage ? "texsubimage2d" : "teximage2d"));
380 measurements.push_back(
381 first_draw_timers.GetAsMeasurement("firstdrawarrays"));
382 measurements.push_back(draw_timers.GetAsMeasurement("drawarrays"));
383 measurements.push_back(finish_timers.GetAsMeasurement("finish"));
385 return measurements;
388 void RunUploadAndDrawMultipleTimes(const gfx::Size& size,
389 const GLenum format,
390 const bool subimage) {
391 std::vector<uint8> pixels;
392 base::SmallMap<std::map<std::string, Measurement>>
393 aggregates; // indexed by name
394 int successful_runs = 0;
395 GLuint texture_id = CreateGLTexture(format, size, subimage);
396 for (int i = 0; i < kUploadPerfWarmupRuns + kUploadPerfTestRuns; ++i) {
397 GenerateTextureData(size, GLFormatBytePerPixel(format), i + 1, &pixels);
398 auto run = UploadAndDraw(texture_id, size, pixels, format, subimage);
399 if (i < kUploadPerfWarmupRuns || !run.size()) {
400 continue;
402 successful_runs++;
403 for (const Measurement& measurement : run) {
404 auto& aggregate = aggregates[measurement.name];
405 aggregate.name = measurement.name;
406 aggregate.Increment(measurement);
409 glDeleteTextures(1, &texture_id);
411 std::string graph_name = base::StringPrintf(
412 "%d_%s", size.width(), gfx::GLEnums::GetStringEnum(format).c_str());
413 if (subimage) {
414 graph_name += "_sub";
417 if (successful_runs) {
418 for (const auto& entry : aggregates) {
419 const auto m = entry.second.Divide(successful_runs);
420 m.PrintResult(graph_name);
423 perf_test::PrintResult("sample_runs", "", graph_name,
424 static_cast<size_t>(successful_runs), "laps", true);
427 const gfx::Size fbo_size_; // for the fbo
428 scoped_refptr<gfx::GLContext> gl_context_;
429 scoped_refptr<gfx::GLSurface> surface_;
430 scoped_refptr<gfx::GPUTimingClient> gpu_timing_client_;
432 GLuint color_texture_ = 0;
433 GLuint framebuffer_object_ = 0;
434 GLuint vertex_shader_ = 0;
435 GLuint fragment_shader_ = 0;
436 GLuint program_object_ = 0;
437 GLint sampler_location_ = -1;
438 GLint translation_location_ = -1;
439 GLuint vertex_buffer_ = 0;
441 bool has_texture_storage_ = false;
444 // Perf test that generates, uploads and draws a texture on a surface repeatedly
445 // and prints out aggregated measurements for all the runs.
446 TEST_F(TextureUploadPerfTest, upload) {
447 int sizes[] = {21, 128, 256, 512, 1024};
448 std::vector<GLenum> formats;
449 formats.push_back(GL_RGBA);
451 if (!gl_context_->GetVersionInfo()->is_es3) {
452 // Used by default for ResourceProvider::yuv_resource_format_.
453 formats.push_back(GL_LUMINANCE);
456 ui::ScopedMakeCurrent smc(gl_context_.get(), surface_.get());
457 const bool has_texture_rg = gl_context_->GetVersionInfo()->is_es3 ||
458 gl_context_->HasExtension("GL_EXT_texture_rg") ||
459 gl_context_->HasExtension("GL_ARB_texture_rg");
461 if (has_texture_rg) {
462 // Used as ResourceProvider::yuv_resource_format_ if
463 // {ARB,EXT}_texture_rg are available.
464 formats.push_back(GL_RED);
467 for (int side : sizes) {
468 ASSERT_GE(fbo_size_.width(), side);
469 ASSERT_GE(fbo_size_.height(), side);
470 gfx::Size size(side, side);
471 GenerateVertexBuffer(size);
472 for (GLenum format : formats) {
473 RunUploadAndDrawMultipleTimes(size, format, true); // use glTexSubImage2D
474 RunUploadAndDrawMultipleTimes(size, format, false); // use glTexImage2D
479 // Perf test to check if the driver is doing texture renaming.
480 // This test creates one GL texture_id and four different images. For
481 // every image it uploads it using texture_id and it draws multiple
482 // times. The cpu/wall time and the gpu time for all the uploads and
483 // draws, but before glFinish, is computed and is printed out at the end as
484 // "upload_and_draw". If the gpu time is >> than the cpu/wall time we expect the
485 // driver to do texture renaming: this means that while the gpu is drawing using
486 // texture_id it didn't block cpu side the texture upload using the same
487 // texture_id.
488 TEST_F(TextureUploadPerfTest, renaming) {
489 gfx::Size texture_size(fbo_size_.width() / 2, fbo_size_.height() / 2);
491 std::vector<uint8> pixels[4];
492 for (int i = 0; i < 4; ++i) {
493 GenerateTextureData(texture_size, 4, i + 1, &pixels[i]);
496 ui::ScopedMakeCurrent smc(gl_context_.get(), surface_.get());
497 GenerateVertexBuffer(texture_size);
499 gfx::Vector2dF positions[] = {gfx::Vector2dF(0.f, 0.f),
500 gfx::Vector2dF(1.f, 0.f),
501 gfx::Vector2dF(0.f, 1.f),
502 gfx::Vector2dF(1.f, 1.f)};
503 GLuint texture_id = CreateGLTexture(GL_RGBA, texture_size, true);
505 MeasurementTimers upload_and_draw_timers(gpu_timing_client_.get());
507 for (int i = 0; i < 4; ++i) {
508 UploadTexture(texture_id, texture_size, pixels[i % 4], GL_RGBA, true);
509 DCHECK_NE(-1, translation_location_);
510 glUniform2f(translation_location_, positions[i % 4].x(),
511 positions[i % 4].y());
512 // Draw the same quad multiple times to make sure that the time spent on the
513 // gpu is more than the cpu time.
514 for (int draw = 0; draw < 128; ++draw) {
515 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
519 upload_and_draw_timers.Record();
520 MeasurementTimers finish_timers(gpu_timing_client_.get());
521 glFinish();
522 CheckNoGlError("glFinish");
523 finish_timers.Record();
525 glDeleteTextures(1, &texture_id);
527 for (int i = 0; i < 4; ++i) {
528 std::vector<uint8> pixels_rendered(texture_size.GetArea() * 4);
529 glReadPixels(texture_size.width() * positions[i].x(),
530 texture_size.height() * positions[i].y(), texture_size.width(),
531 texture_size.height(), GL_RGBA, GL_UNSIGNED_BYTE,
532 &pixels_rendered[0]);
533 CheckNoGlError("glReadPixels");
534 ASSERT_EQ(pixels[i].size(), pixels_rendered.size());
535 EXPECT_EQ(pixels[i], pixels_rendered);
538 bool gpu_timer_errors = gpu_timing_client_->IsAvailable() &&
539 gpu_timing_client_->CheckAndResetTimerErrors();
540 if (!gpu_timer_errors) {
541 upload_and_draw_timers.GetAsMeasurement("upload_and_draw")
542 .PrintResult("renaming");
543 finish_timers.GetAsMeasurement("finish").PrintResult("renaming");
547 } // namespace
548 } // namespace gpu