Don't preload rarely seen large images
[chromium-blink-merge.git] / gpu / perftests / texture_upload_perftest.cc
bloba70e0f1d2ac5b523c4dce3763ccc4fe6246b347b
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <algorithm>
6 #include <vector>
8 #include "base/containers/small_map.h"
9 #include "base/logging.h"
10 #include "base/memory/ref_counted.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/strings/stringprintf.h"
13 #include "gpu/perftests/measurements.h"
14 #include "testing/gmock/include/gmock/gmock.h"
15 #include "testing/gtest/include/gtest/gtest.h"
16 #include "testing/perf/perf_test.h"
17 #include "ui/gfx/geometry/size.h"
18 #include "ui/gfx/geometry/vector2d_f.h"
19 #include "ui/gl/gl_bindings.h"
20 #include "ui/gl/gl_context.h"
21 #include "ui/gl/gl_enums.h"
22 #include "ui/gl/gl_surface.h"
23 #include "ui/gl/gl_version_info.h"
24 #include "ui/gl/gpu_timing.h"
25 #include "ui/gl/scoped_make_current.h"
27 #if defined(USE_OZONE)
28 #include "base/message_loop/message_loop.h"
29 #endif
31 namespace gpu {
32 namespace {
34 const int kUploadPerfWarmupRuns = 5;
35 const int kUploadPerfTestRuns = 30;
37 #define SHADER(Src) #Src
39 // clang-format off
40 const char kVertexShader[] =
41 SHADER(
42 uniform vec2 translation;
43 attribute vec2 a_position;
44 attribute vec2 a_texCoord;
45 varying vec2 v_texCoord;
46 void main() {
47 gl_Position = vec4(
48 translation.x + a_position.x, translation.y + a_position.y, 0.0, 1.0);
49 v_texCoord = a_texCoord;
52 const char kShaderDefaultFloatPrecision[] =
53 SHADER(
54 precision mediump float;
56 const char kFragmentShader[] =
57 SHADER(
58 uniform sampler2D a_texture;
59 varying vec2 v_texCoord;
60 void main() {
61 gl_FragColor = texture2D(a_texture, v_texCoord);
64 // clang-format on
66 void CheckNoGlError(const std::string& msg) {
67 CHECK_EQ(static_cast<GLenum>(GL_NO_ERROR), glGetError()) << " " << msg;
70 // Utility function to compile a shader from a string.
71 GLuint LoadShader(const GLenum type, const char* const src) {
72 GLuint shader = 0;
73 shader = glCreateShader(type);
74 CHECK_NE(0u, shader);
75 glShaderSource(shader, 1, &src, NULL);
76 glCompileShader(shader);
78 GLint compiled = 0;
79 glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
80 if (compiled == 0) {
81 GLint len = 0;
82 glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &len);
83 if (len > 1) {
84 scoped_ptr<char[]> error_log(new char[len]);
85 glGetShaderInfoLog(shader, len, NULL, error_log.get());
86 LOG(ERROR) << "Error compiling shader: " << error_log.get();
89 CHECK_NE(0, compiled);
90 return shader;
93 int GLFormatBytePerPixel(GLenum format) {
94 DCHECK(format == GL_RGBA || format == GL_LUMINANCE || format == GL_RED_EXT);
95 return format == GL_RGBA ? 4 : 1;
98 GLenum GLFormatToInternalFormat(GLenum format) {
99 return format == GL_RED ? GL_R8 : format;
102 GLenum GLFormatToStorageFormat(GLenum format) {
103 switch (format) {
104 case GL_RGBA:
105 return GL_RGBA8;
106 case GL_LUMINANCE:
107 return GL_LUMINANCE8;
108 case GL_RED:
109 return GL_R8;
110 default:
111 NOTREACHED();
113 return 0;
116 void GenerateTextureData(const gfx::Size& size,
117 int bytes_per_pixel,
118 const int seed,
119 std::vector<uint8>* const pixels) {
120 // Row bytes has to be multiple of 4 (GL_PACK_ALIGNMENT defaults to 4).
121 int stride = ((size.width() * bytes_per_pixel) + 3) & ~0x3;
122 pixels->resize(size.height() * stride);
123 for (int y = 0; y < size.height(); ++y) {
124 for (int x = 0; x < size.width(); ++x) {
125 for (int channel = 0; channel < bytes_per_pixel; ++channel) {
126 int index = y * stride + x * bytes_per_pixel;
127 pixels->at(index) = (index + (seed << 2)) % (0x20 << channel);
133 // Compare a buffer containing pixels in a specified format to GL_RGBA buffer
134 // where the former buffer have been uploaded as a texture and drawn on the
135 // RGBA buffer.
136 bool CompareBufferToRGBABuffer(GLenum format,
137 const gfx::Size& size,
138 const std::vector<uint8>& pixels,
139 const std::vector<uint8>& rgba) {
140 int bytes_per_pixel = GLFormatBytePerPixel(format);
141 int pixels_stride = ((size.width() * bytes_per_pixel) + 3) & ~0x3;
142 int rgba_stride = size.width() * GLFormatBytePerPixel(GL_RGBA);
143 for (int y = 0; y < size.height(); ++y) {
144 for (int x = 0; x < size.width(); ++x) {
145 int rgba_index = y * rgba_stride + x * GLFormatBytePerPixel(GL_RGBA);
146 int pixels_index = y * pixels_stride + x * bytes_per_pixel;
147 uint8 expected[4] = {0};
148 switch (format) {
149 case GL_LUMINANCE: // (L_t, L_t, L_t, 1)
150 expected[1] = pixels[pixels_index];
151 expected[2] = pixels[pixels_index];
152 case GL_RED: // (R_t, 0, 0, 1)
153 expected[0] = pixels[pixels_index];
154 expected[3] = 255;
155 break;
156 case GL_RGBA: // (R_t, G_t, B_t, A_t)
157 memcpy(expected, &pixels[pixels_index], 4);
158 break;
159 default:
160 NOTREACHED();
162 if (memcmp(&rgba[rgba_index], expected, 4)) {
163 return false;
167 return true;
170 // PerfTest to check costs of texture upload at different stages
171 // on different platforms.
172 class TextureUploadPerfTest : public testing::Test {
173 public:
174 TextureUploadPerfTest() : fbo_size_(1024, 1024) {}
176 // Overridden from testing::Test
177 void SetUp() override {
178 #if defined(USE_OZONE)
179 // On Ozone, the backend initializes the event system using a UI
180 // thread.
181 base::MessageLoopForUI main_loop;
182 #endif
183 static bool gl_initialized = gfx::GLSurface::InitializeOneOff();
184 DCHECK(gl_initialized);
185 // Initialize an offscreen surface and a gl context.
186 surface_ = gfx::GLSurface::CreateOffscreenGLSurface(gfx::Size());
187 gl_context_ = gfx::GLContext::CreateGLContext(NULL, // share_group
188 surface_.get(),
189 gfx::PreferIntegratedGpu);
190 ui::ScopedMakeCurrent smc(gl_context_.get(), surface_.get());
191 glGenTextures(1, &color_texture_);
192 glBindTexture(GL_TEXTURE_2D, color_texture_);
193 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
194 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
195 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
196 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
197 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, fbo_size_.width(),
198 fbo_size_.height(), 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
200 glGenFramebuffersEXT(1, &framebuffer_object_);
201 glBindFramebufferEXT(GL_FRAMEBUFFER, framebuffer_object_);
203 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
204 GL_TEXTURE_2D, color_texture_, 0);
205 DCHECK_EQ(static_cast<GLenum>(GL_FRAMEBUFFER_COMPLETE),
206 glCheckFramebufferStatusEXT(GL_FRAMEBUFFER));
208 glViewport(0, 0, fbo_size_.width(), fbo_size_.height());
209 gpu_timing_client_ = gl_context_->CreateGPUTimingClient();
211 if (gpu_timing_client_->IsAvailable()) {
212 LOG(INFO) << "Gpu timing initialized with timer type: "
213 << gpu_timing_client_->GetTimerTypeName();
214 } else {
215 LOG(WARNING) << "Can't initialize gpu timing";
217 // Prepare a simple program and a vertex buffer that will be
218 // used to draw a quad on the offscreen surface.
219 vertex_shader_ = LoadShader(GL_VERTEX_SHADER, kVertexShader);
221 bool is_gles = gfx::GetGLImplementation() == gfx::kGLImplementationEGLGLES2;
222 fragment_shader_ = LoadShader(
223 GL_FRAGMENT_SHADER,
224 base::StringPrintf("%s%s", is_gles ? kShaderDefaultFloatPrecision : "",
225 kFragmentShader).c_str());
226 program_object_ = glCreateProgram();
227 CHECK_NE(0u, program_object_);
229 glAttachShader(program_object_, vertex_shader_);
230 glAttachShader(program_object_, fragment_shader_);
231 glBindAttribLocation(program_object_, 0, "a_position");
232 glBindAttribLocation(program_object_, 1, "a_texCoord");
233 glLinkProgram(program_object_);
235 GLint linked = -1;
236 glGetProgramiv(program_object_, GL_LINK_STATUS, &linked);
237 CHECK_NE(0, linked);
238 glUseProgram(program_object_);
239 glUniform1i(sampler_location_, 0);
240 translation_location_ =
241 glGetUniformLocation(program_object_, "translation");
242 DCHECK_NE(-1, translation_location_);
243 glUniform2f(translation_location_, 0.0f, 0.0f);
245 sampler_location_ = glGetUniformLocation(program_object_, "a_texture");
246 CHECK_NE(-1, sampler_location_);
248 glGenBuffersARB(1, &vertex_buffer_);
249 CHECK_NE(0u, vertex_buffer_);
250 DCHECK_NE(0u, vertex_buffer_);
251 glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_);
252 glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4, 0);
253 glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4,
254 reinterpret_cast<void*>(sizeof(GLfloat) * 2));
255 glEnableVertexAttribArray(0);
256 glEnableVertexAttribArray(1);
257 CheckNoGlError("glEnableVertexAttribArray");
259 has_texture_storage_ =
260 gl_context_->GetVersionInfo()->is_es3 ||
261 gl_context_->HasExtension("GL_EXT_texture_storage") ||
262 gl_context_->HasExtension("GL_ARB_texture_storage");
265 void GenerateVertexBuffer(const gfx::Size& size) {
266 DCHECK_NE(0u, vertex_buffer_);
267 glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_);
268 // right and top are in clipspace
269 float right = -1.f + 2.f * size.width() / fbo_size_.width();
270 float top = -1.f + 2.f * size.height() / fbo_size_.height();
271 // Four vertexes, one per line. Each vertex has two components per
272 // position and two per texcoord.
273 // It represents a quad formed by two triangles if interpreted
274 // as a tristrip.
276 // clang-format off
277 GLfloat data[16] = {
278 -1.f, -1.f, 0.f, 0.f,
279 right, -1.f, 1.f, 0.f,
280 -1.f, top, 0.f, 1.f,
281 right, top, 1.f, 1.f};
282 // clang-format on
283 glBufferData(GL_ARRAY_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
284 CheckNoGlError("glBufferData");
287 void TearDown() override {
288 ui::ScopedMakeCurrent smc(gl_context_.get(), surface_.get());
289 glDeleteProgram(program_object_);
290 glDeleteShader(vertex_shader_);
291 glDeleteShader(fragment_shader_);
292 glDeleteBuffersARB(1, &vertex_buffer_);
294 glBindFramebufferEXT(GL_FRAMEBUFFER, 0);
295 glDeleteFramebuffersEXT(1, &framebuffer_object_);
296 glDeleteTextures(1, &color_texture_);
297 CheckNoGlError("glDeleteTextures");
299 gpu_timing_client_ = nullptr;
300 gl_context_ = nullptr;
301 surface_ = nullptr;
304 protected:
305 GLuint CreateGLTexture(const GLenum format,
306 const gfx::Size& size,
307 const bool specify_storage) {
308 GLuint texture_id = 0;
309 glActiveTexture(GL_TEXTURE0);
310 glGenTextures(1, &texture_id);
311 glBindTexture(GL_TEXTURE_2D, texture_id);
312 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
313 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
314 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
315 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
316 if (specify_storage) {
317 if (has_texture_storage_) {
318 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GLFormatToStorageFormat(format),
319 size.width(), size.height());
320 CheckNoGlError("glTexStorage2DEXT");
321 } else {
322 glTexImage2D(GL_TEXTURE_2D, 0, GLFormatToInternalFormat(format),
323 size.width(), size.height(), 0, format, GL_UNSIGNED_BYTE,
324 nullptr);
325 CheckNoGlError("glTexImage2D");
328 return texture_id;
331 void UploadTexture(GLuint texture_id,
332 const gfx::Size& size,
333 const std::vector<uint8>& pixels,
334 GLenum format,
335 const bool subimage) {
336 if (subimage) {
337 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, size.width(), size.height(),
338 format, GL_UNSIGNED_BYTE, &pixels[0]);
339 CheckNoGlError("glTexSubImage2D");
340 } else {
341 glTexImage2D(GL_TEXTURE_2D, 0, GLFormatToInternalFormat(format),
342 size.width(), size.height(), 0, format, GL_UNSIGNED_BYTE,
343 &pixels[0]);
344 CheckNoGlError("glTexImage2D");
348 // Upload and draw on the offscren surface.
349 // Return a list of pair. Each pair describe a gl operation and the wall
350 // time elapsed in milliseconds.
351 std::vector<Measurement> UploadAndDraw(GLuint texture_id,
352 const gfx::Size& size,
353 const std::vector<uint8>& pixels,
354 const GLenum format,
355 const bool subimage) {
356 MeasurementTimers tex_timers(gpu_timing_client_.get());
357 UploadTexture(texture_id, size, pixels, format, subimage);
358 tex_timers.Record();
360 MeasurementTimers first_draw_timers(gpu_timing_client_.get());
361 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
362 first_draw_timers.Record();
364 MeasurementTimers draw_timers(gpu_timing_client_.get());
365 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
366 draw_timers.Record();
368 MeasurementTimers finish_timers(gpu_timing_client_.get());
369 glFinish();
370 CheckNoGlError("glFinish");
371 finish_timers.Record();
373 std::vector<uint8> pixels_rendered(size.GetArea() * 4);
374 glReadPixels(0, 0, size.width(), size.height(), GL_RGBA, GL_UNSIGNED_BYTE,
375 &pixels_rendered[0]);
376 CheckNoGlError("glReadPixels");
377 EXPECT_TRUE(
378 CompareBufferToRGBABuffer(format, size, pixels, pixels_rendered))
379 << "Format is: " << gfx::GLEnums::GetStringEnum(format);
381 std::vector<Measurement> measurements;
382 bool gpu_timer_errors =
383 gpu_timing_client_->IsAvailable() &&
384 gpu_timing_client_->CheckAndResetTimerErrors();
385 if (!gpu_timer_errors) {
386 measurements.push_back(tex_timers.GetAsMeasurement(
387 subimage ? "texsubimage2d" : "teximage2d"));
388 measurements.push_back(
389 first_draw_timers.GetAsMeasurement("firstdrawarrays"));
390 measurements.push_back(draw_timers.GetAsMeasurement("drawarrays"));
391 measurements.push_back(finish_timers.GetAsMeasurement("finish"));
393 return measurements;
396 void RunUploadAndDrawMultipleTimes(const gfx::Size& size,
397 const GLenum format,
398 const bool subimage) {
399 std::vector<uint8> pixels;
400 base::SmallMap<std::map<std::string, Measurement>>
401 aggregates; // indexed by name
402 int successful_runs = 0;
403 GLuint texture_id = CreateGLTexture(format, size, subimage);
404 for (int i = 0; i < kUploadPerfWarmupRuns + kUploadPerfTestRuns; ++i) {
405 GenerateTextureData(size, GLFormatBytePerPixel(format), i + 1, &pixels);
406 auto run = UploadAndDraw(texture_id, size, pixels, format, subimage);
407 if (i < kUploadPerfWarmupRuns || !run.size()) {
408 continue;
410 successful_runs++;
411 for (const Measurement& measurement : run) {
412 auto& aggregate = aggregates[measurement.name];
413 aggregate.name = measurement.name;
414 aggregate.Increment(measurement);
417 glDeleteTextures(1, &texture_id);
419 std::string graph_name = base::StringPrintf(
420 "%d_%s", size.width(), gfx::GLEnums::GetStringEnum(format).c_str());
421 if (subimage) {
422 graph_name += "_sub";
425 if (successful_runs) {
426 for (const auto& entry : aggregates) {
427 const auto m = entry.second.Divide(successful_runs);
428 m.PrintResult(graph_name);
431 perf_test::PrintResult("sample_runs", "", graph_name,
432 static_cast<size_t>(successful_runs), "laps", true);
435 const gfx::Size fbo_size_; // for the fbo
436 scoped_refptr<gfx::GLContext> gl_context_;
437 scoped_refptr<gfx::GLSurface> surface_;
438 scoped_refptr<gfx::GPUTimingClient> gpu_timing_client_;
440 GLuint color_texture_ = 0;
441 GLuint framebuffer_object_ = 0;
442 GLuint vertex_shader_ = 0;
443 GLuint fragment_shader_ = 0;
444 GLuint program_object_ = 0;
445 GLint sampler_location_ = -1;
446 GLint translation_location_ = -1;
447 GLuint vertex_buffer_ = 0;
449 bool has_texture_storage_ = false;
452 // Perf test that generates, uploads and draws a texture on a surface repeatedly
453 // and prints out aggregated measurements for all the runs.
454 TEST_F(TextureUploadPerfTest, upload) {
455 int sizes[] = {21, 128, 256, 512, 1024};
456 std::vector<GLenum> formats;
457 formats.push_back(GL_RGBA);
459 if (!gl_context_->GetVersionInfo()->is_es3) {
460 // Used by default for ResourceProvider::yuv_resource_format_.
461 formats.push_back(GL_LUMINANCE);
464 ui::ScopedMakeCurrent smc(gl_context_.get(), surface_.get());
465 const bool has_texture_rg = gl_context_->GetVersionInfo()->is_es3 ||
466 gl_context_->HasExtension("GL_EXT_texture_rg") ||
467 gl_context_->HasExtension("GL_ARB_texture_rg");
469 if (has_texture_rg) {
470 // Used as ResourceProvider::yuv_resource_format_ if
471 // {ARB,EXT}_texture_rg are available.
472 formats.push_back(GL_RED);
475 for (int side : sizes) {
476 ASSERT_GE(fbo_size_.width(), side);
477 ASSERT_GE(fbo_size_.height(), side);
478 gfx::Size size(side, side);
479 GenerateVertexBuffer(size);
480 for (GLenum format : formats) {
481 RunUploadAndDrawMultipleTimes(size, format, true); // use glTexSubImage2D
482 RunUploadAndDrawMultipleTimes(size, format, false); // use glTexImage2D
487 // Perf test to check if the driver is doing texture renaming.
488 // This test creates one GL texture_id and four different images. For
489 // every image it uploads it using texture_id and it draws multiple
490 // times. The cpu/wall time and the gpu time for all the uploads and
491 // draws, but before glFinish, is computed and is printed out at the end as
492 // "upload_and_draw". If the gpu time is >> than the cpu/wall time we expect the
493 // driver to do texture renaming: this means that while the gpu is drawing using
494 // texture_id it didn't block cpu side the texture upload using the same
495 // texture_id.
496 TEST_F(TextureUploadPerfTest, renaming) {
497 gfx::Size texture_size(fbo_size_.width() / 2, fbo_size_.height() / 2);
499 std::vector<uint8> pixels[4];
500 for (int i = 0; i < 4; ++i) {
501 GenerateTextureData(texture_size, 4, i + 1, &pixels[i]);
504 ui::ScopedMakeCurrent smc(gl_context_.get(), surface_.get());
505 GenerateVertexBuffer(texture_size);
507 gfx::Vector2dF positions[] = {gfx::Vector2dF(0.f, 0.f),
508 gfx::Vector2dF(1.f, 0.f),
509 gfx::Vector2dF(0.f, 1.f),
510 gfx::Vector2dF(1.f, 1.f)};
511 GLuint texture_id = CreateGLTexture(GL_RGBA, texture_size, true);
513 MeasurementTimers upload_and_draw_timers(gpu_timing_client_.get());
515 for (int i = 0; i < 4; ++i) {
516 UploadTexture(texture_id, texture_size, pixels[i % 4], GL_RGBA, true);
517 DCHECK_NE(-1, translation_location_);
518 glUniform2f(translation_location_, positions[i % 4].x(),
519 positions[i % 4].y());
520 // Draw the same quad multiple times to make sure that the time spent on the
521 // gpu is more than the cpu time.
522 for (int draw = 0; draw < 128; ++draw) {
523 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
527 upload_and_draw_timers.Record();
528 MeasurementTimers finish_timers(gpu_timing_client_.get());
529 glFinish();
530 CheckNoGlError("glFinish");
531 finish_timers.Record();
533 glDeleteTextures(1, &texture_id);
535 for (int i = 0; i < 4; ++i) {
536 std::vector<uint8> pixels_rendered(texture_size.GetArea() * 4);
537 glReadPixels(texture_size.width() * positions[i].x(),
538 texture_size.height() * positions[i].y(), texture_size.width(),
539 texture_size.height(), GL_RGBA, GL_UNSIGNED_BYTE,
540 &pixels_rendered[0]);
541 CheckNoGlError("glReadPixels");
542 ASSERT_EQ(pixels[i].size(), pixels_rendered.size());
543 EXPECT_EQ(pixels[i], pixels_rendered);
546 bool gpu_timer_errors = gpu_timing_client_->IsAvailable() &&
547 gpu_timing_client_->CheckAndResetTimerErrors();
548 if (!gpu_timer_errors) {
549 upload_and_draw_timers.GetAsMeasurement("upload_and_draw")
550 .PrintResult("renaming");
551 finish_timers.GetAsMeasurement("finish").PrintResult("renaming");
555 } // namespace
556 } // namespace gpu