From 1fa4219648d62b18de8729b53abefa2ab0c6dd94 Mon Sep 17 00:00:00 2001
From: dyen <dyen@chromium.org>
Date: Wed, 17 Jun 2015 14:25:39 -0700
Subject: [PATCH] Elapsed timer queries are virtual and support multiple
 queries.

GPUTiming now handles the virtualization of elapsed timer queries
which allow us to support multiple queries across different uses. For
example we can now do internal timings for performance reasons, as
well as using the GPUTracer, as well as supporting WebGL related
query calls.

Along with the virtualization of timer queries, using the elapsed
timer mode in GPUTiming also estimates the time stamp that
GL_ARB_timer_query and GL_EXT_disjoint_timer_query would normally
return using GL_TIME_ELAPSED_EXT. This allows us to workaround issues
when the drivers are not working properly with reasonable values.

Most of the code here is due to the fact that GL_TIME_ELAPSED_EXT has a
limitation where only 1 query can only be allowed in a GL Context. What
I have basically done is split up queries into multiple query chunks
and adding them up at the end. Since queries are correct relative to
one another when they are stacking on top of each other, the GPU timer
can utilize the end and start time to figure out the delta.

R=sievers@chromium.org, vmiura@chromium.org
BUG=453965, 345227

Review URL: https://codereview.chromium.org/1152153006

Cr-Commit-Position: refs/heads/master@{#334922}
---
 gpu/command_buffer/service/gles2_cmd_decoder.cc   |   7 +
 gpu/command_buffer/service/gpu_tracer.cc          |   7 +-
 gpu/command_buffer/service/gpu_tracer_unittest.cc |   6 +-
 gpu/config/gpu_driver_bug_list_json.cc            |  10 +-
 gpu/config/gpu_driver_bug_workaround_type.h       |   2 +
 gpu/perftests/measurements.cc                     |   2 +-
 gpu/perftests/texture_upload_perftest.cc          |   1 -
 ui/gl/gl_context.cc                               |   2 +-
 ui/gl/gpu_timing.cc                               | 606 +++++++++++++++++-----
 ui/gl/gpu_timing.h                                |  64 +--
 ui/gl/gpu_timing_fake.cc                          |  29 +-
 ui/gl/gpu_timing_unittest.cc                      |  19 +-
 12 files changed, 558 insertions(+), 197 deletions(-)
diff --git a/gpu/command_buffer/service/gles2_cmd_decoder.cc b/gpu/command_buffer/service/gles2_cmd_decoder.cc
index f932e84308be..6806f126eecf 100644
--- a/gpu/command_buffer/service/gles2_cmd_decoder.cc
+++ b/gpu/command_buffer/service/gles2_cmd_decoder.cc
@@ -66,6 +66,7 @@
 #include "ui/gl/gl_image.h"
 #include "ui/gl/gl_implementation.h"
 #include "ui/gl/gl_surface.h"
+#include "ui/gl/gpu_timing.h"
 
 #if defined(OS_MACOSX)
 #include <IOSurface/IOSurfaceAPI.h>
@@ -2615,6 +2616,12 @@ bool GLES2DecoderImpl::Initialize(
   // Create GPU Tracer for timing values.
   gpu_tracer_.reset(new GPUTracer(this));
 
+  if (feature_info_->workarounds().disable_timestamp_queries) {
+    // Forcing time elapsed query for any GPU Timing Client forces it for all
+    // clients in the context.
+    GetGLContext()->CreateGPUTimingClient()->ForceTimeElapsedQuery();
+  }
+
   // Save the loseContextWhenOutOfMemory context creation attribute.
   lose_context_when_out_of_memory_ =
       attrib_parser.lose_context_when_out_of_memory;
diff --git a/gpu/command_buffer/service/gpu_tracer.cc b/gpu/command_buffer/service/gpu_tracer.cc
index 2f01916f13f1..b64639d30536 100644
--- a/gpu/command_buffer/service/gpu_tracer.cc
+++ b/gpu/command_buffer/service/gpu_tracer.cc
@@ -133,11 +133,8 @@ GPUTrace::GPUTrace(scoped_refptr<Outputter> outputter,
       outputter_(outputter),
       service_enabled_(tracing_service),
       device_enabled_(tracing_device) {
-  if (tracing_device &&
-      gpu_timing_client->IsAvailable() &&
-      gpu_timing_client->IsTimerOffsetAvailable()) {
-    gpu_timer_ = gpu_timing_client->CreateGPUTimer();
-  }
+  if (tracing_device && gpu_timing_client->IsAvailable())
+    gpu_timer_ = gpu_timing_client->CreateGPUTimer(false);
 }
 
 GPUTrace::~GPUTrace() {
diff --git a/gpu/command_buffer/service/gpu_tracer_unittest.cc b/gpu/command_buffer/service/gpu_tracer_unittest.cc
index 9e30e1159516..70644b700492 100644
--- a/gpu/command_buffer/service/gpu_tracer_unittest.cc
+++ b/gpu/command_buffer/service/gpu_tracer_unittest.cc
@@ -189,8 +189,7 @@ class BaseGpuTest : public GpuServiceTest {
     if (tracing_service)
       ExpectOutputterBeginMocks(outputter, source, category, name);
     const bool valid_timer = tracing_device &&
-                             gpu_timing_client_->IsAvailable() &&
-                             GetTimerType() != gfx::GPUTiming::kTimerTypeEXT;
+                             gpu_timing_client_->IsAvailable();
     ExpectOutputterEndMocks(outputter, source, category, name,
                             expect_start_time, expect_end_time,
                             tracing_service, valid_timer);
@@ -402,8 +401,7 @@ class BaseGpuTracerTest : public BaseGpuTest {
       std::string source_category = category_name + num_char;
       std::string source_trace_name = trace_name + num_char;
 
-      const bool valid_timer = gpu_timing_client_->IsAvailable() &&
-                               GetTimerType() != gfx::GPUTiming::kTimerTypeEXT;
+      const bool valid_timer = gpu_timing_client_->IsAvailable();
 
       const GpuTracerSource source = static_cast<GpuTracerSource>(i);
       ExpectOutputterEndMocks(outputter_ref_.get(), source, source_category,
diff --git a/gpu/config/gpu_driver_bug_list_json.cc b/gpu/config/gpu_driver_bug_list_json.cc
index 987138eeb957..80018956b382 100644
--- a/gpu/config/gpu_driver_bug_list_json.cc
+++ b/gpu/config/gpu_driver_bug_list_json.cc
@@ -1311,7 +1311,7 @@ LONG_STRING_CONST(
     {
       "id": 112,
       "cr_bugs": [477514],
-      "description": "EXT_disjoint_timer_query fails after 2 queries on adreno 330 in lollypop",
+      "description": "EXT_disjoint_timer_query fails after 2 queries on adreno 3xx in lollypop",
       "os": {
         "type": "android",
         "version": {
@@ -1320,15 +1320,15 @@ LONG_STRING_CONST(
         }
       },
       "gl_vendor": "Qualcomm.*",
-      "gl_renderer": ".*330",
-      "disabled_extensions": [
-        "GL_EXT_disjoint_timer_query"
+      "gl_renderer": "Adreno \\(TM\\) 3.*",
+      "features": [
+        "disable_timestamp_queries"
       ]
     },
     {
       "id": 113,
       "cr_bugs": [477514],
-      "description": "EXT_disjoint_timer_query fails after 256 queries on adreno 420",
+      "description": "EXT_disjoint_timer_query fails after 256 queries on adreno 4xx",
       "os": {
         "type": "android"
       },
diff --git a/gpu/config/gpu_driver_bug_workaround_type.h b/gpu/config/gpu_driver_bug_workaround_type.h
index c5c9b83f18f4..47f193ddb6cc 100644
--- a/gpu/config/gpu_driver_bug_workaround_type.h
+++ b/gpu/config/gpu_driver_bug_workaround_type.h
@@ -44,6 +44,8 @@
          disable_ns_cgl_surface_api)                         \
   GPU_OP(DISABLE_POST_SUB_BUFFERS_FOR_ONSCREEN_SURFACES,     \
          disable_post_sub_buffers_for_onscreen_surfaces)     \
+  GPU_OP(DISABLE_TIMESTAMP_QUERIES,                          \
+         disable_timestamp_queries)                          \
   GPU_OP(ETC1_POWER_OF_TWO_ONLY,                             \
          etc1_power_of_two_only)                             \
   GPU_OP(EXIT_ON_CONTEXT_LOST,                               \
diff --git a/gpu/perftests/measurements.cc b/gpu/perftests/measurements.cc
index 06a672ba3190..b2dac9133050 100644
--- a/gpu/perftests/measurements.cc
+++ b/gpu/perftests/measurements.cc
@@ -65,7 +65,7 @@ MeasurementTimers::MeasurementTimers(gfx::GPUTimingClient* gpu_timing_client)
   }
 
   if (gpu_timing_client->IsAvailable()) {
-    gpu_timer_ = gpu_timing_client->CreateGPUTimer();
+    gpu_timer_ = gpu_timing_client->CreateGPUTimer(true);
     gpu_timer_->Start();
   }
 }
diff --git a/gpu/perftests/texture_upload_perftest.cc b/gpu/perftests/texture_upload_perftest.cc
index 27444a4c432c..a70e0f1d2ac5 100644
--- a/gpu/perftests/texture_upload_perftest.cc
+++ b/gpu/perftests/texture_upload_perftest.cc
@@ -211,7 +211,6 @@ class TextureUploadPerfTest : public testing::Test {
     if (gpu_timing_client_->IsAvailable()) {
       LOG(INFO) << "Gpu timing initialized with timer type: "
                 << gpu_timing_client_->GetTimerTypeName();
-      gpu_timing_client_->InvalidateTimerOffset();
     } else {
       LOG(WARNING) << "Can't initialize gpu timing";
     }
diff --git a/ui/gl/gl_context.cc b/ui/gl/gl_context.cc
index 0b755a7910b8..1049bfdba3ab 100644
--- a/ui/gl/gl_context.cc
+++ b/ui/gl/gl_context.cc
@@ -259,7 +259,7 @@ GLContextReal::GLContextReal(GLShareGroup* share_group)
 
 scoped_refptr<gfx::GPUTimingClient> GLContextReal::CreateGPUTimingClient() {
   if (!gpu_timing_) {
-    gpu_timing_.reset(new gfx::GPUTiming(this));
+    gpu_timing_.reset(GPUTiming::CreateGPUTiming(this));
   }
   return gpu_timing_->CreateGPUTimingClient();
 }
diff --git a/ui/gl/gpu_timing.cc b/ui/gl/gpu_timing.cc
index dc720df58e03..65110b69cdad 100644
--- a/ui/gl/gpu_timing.cc
+++ b/ui/gl/gpu_timing.cc
@@ -11,29 +11,318 @@
 
 namespace gfx {
 
-GPUTiming::GPUTiming(GLContextReal* context) {
+int64_t NanoToMicro(uint64_t nano_seconds) {
+  const uint64_t up = nano_seconds + base::Time::kNanosecondsPerMicrosecond / 2;
+  return static_cast<int64_t>(up / base::Time::kNanosecondsPerMicrosecond);
+}
+
+class GPUTimingImpl : public GPUTiming {
+ public:
+   GPUTimingImpl(GLContextReal* context);
+  ~GPUTimingImpl() override;
+
+  void ForceTimeElapsedQuery() { force_time_elapsed_query_ = true; }
+  bool IsForceTimeElapsedQuery() { return force_time_elapsed_query_; }
+
+  GPUTiming::TimerType GetTimerType() const { return timer_type_; }
+
+  uint32_t GetDisjointCount();
+  int64 CalculateTimerOffset();
+
+  scoped_refptr<QueryResult> BeginElapsedTimeQuery();
+  void EndElapsedTimeQuery(scoped_refptr<QueryResult> result);
+
+  scoped_refptr<QueryResult> DoTimeStampQuery();
+
+  int64 GetCurrentCPUTime() {
+    return cpu_time_for_testing_.is_null()
+           ? (base::TraceTicks::Now() - base::TraceTicks()).InMicroseconds()
+           : cpu_time_for_testing_.Run();
+  }
+  void SetCpuTimeForTesting(const base::Callback<int64(void)>& cpu_time) {
+    cpu_time_for_testing_ = cpu_time;
+  }
+
+  void UpdateQueryResults();
+
+  int64_t GetMaxTimeStamp() { return max_time_stamp_; }
+  void UpdateMaxTimeStamp(int64_t value) {
+    max_time_stamp_ = std::max(max_time_stamp_, value);
+  }
+
+  uint32_t GetElapsedQueryCount() { return elapsed_query_count_; }
+  void IncElapsedQueryCount() { elapsed_query_count_++; }
+  void DecElapsedQueryCount() { elapsed_query_count_--; }
+
+  void SetLastElapsedQuery(scoped_refptr<TimeElapsedTimerQuery> query);
+  scoped_refptr<TimeElapsedTimerQuery> GetLastElapsedQuery();
+
+  void HandleBadQuery();
+  bool IsGoodQueryID(uint32_t query_id);
+
+ private:
+  scoped_refptr<GPUTimingClient> CreateGPUTimingClient() override;
+
+  base::Callback<int64(void)> cpu_time_for_testing_;
+  GPUTiming::TimerType timer_type_ = GPUTiming::kTimerTypeInvalid;
+  uint32_t disjoint_counter_ = 0;
+  int64 offset_ = 0;  // offset cache when timer_type_ == kTimerTypeARB
+  bool offset_valid_ = false;
+  bool force_time_elapsed_query_ = false;
+
+  uint32_t next_timer_query_id_ = 0;
+  uint32_t next_good_timer_query_id_ = 0; // identify bad ids for disjoints.
+  uint32_t query_disjoint_count_ = 0;
+
+  // Extra state tracking data for elapsed timer queries.
+  int64_t max_time_stamp_ = 0;
+  uint32_t elapsed_query_count_ = 0;
+  scoped_refptr<TimeElapsedTimerQuery> last_elapsed_query_;
+
+  std::deque<scoped_refptr<TimerQuery> > queries_;
+
+  DISALLOW_COPY_AND_ASSIGN(GPUTimingImpl);
+};
+
+class QueryResult : public base::RefCounted<QueryResult> {
+ public:
+  QueryResult() {}
+
+  bool IsAvailable() const { return available_; }
+  int64_t GetDelta() const { return end_value_ - start_value_; }
+  int64_t GetStartValue() const { return start_value_; }
+  int64_t GetEndValue() const { return end_value_; }
+
+  void SetStartValue(int64_t value) { start_value_ = value; }
+  void SetEndValue(int64_t value) { available_ = true; end_value_ = value; }
+
+ private:
+  friend class base::RefCounted<QueryResult>;
+  ~QueryResult() {}
+
+  bool available_ = false;
+  int64_t start_value_ = 0;
+  int64_t end_value_ = 0;
+
+  DISALLOW_COPY_AND_ASSIGN(QueryResult);
+};
+
+class TimerQuery : public base::RefCounted<TimerQuery> {
+ public:
+  TimerQuery(uint32_t next_id);
+  virtual void Destroy() = 0;
+
+  // Returns true when UpdateQueryResults() is ready to be called.
+  virtual bool IsAvailable(GPUTimingImpl* gpu_timing) = 0;
+
+  // Fills out query result start and end, called after IsAvailable() is true.
+  virtual void UpdateQueryResults(GPUTimingImpl* gpu_timing) = 0;
+
+  // Called when Query is next in line, used to transition states.
+  virtual void PrepareNextUpdate(scoped_refptr<TimerQuery> prev) {}
+
+  uint32_t timer_query_id_ = 0;
+  int64_t time_stamp_ = 0; // Timestamp of the query, could be estimated.
+
+ protected:
+  friend class base::RefCounted<TimerQuery>;
+  virtual ~TimerQuery();
+  DISALLOW_COPY_AND_ASSIGN(TimerQuery);
+};
+
+TimerQuery::TimerQuery(uint32_t next_id)
+    : timer_query_id_(next_id) {
+}
+
+TimerQuery::~TimerQuery() {
+}
+
+class TimeElapsedTimerQuery : public TimerQuery {
+ public:
+  TimeElapsedTimerQuery(GPUTimingImpl* gpu_timing, uint32_t next_id)
+      : TimerQuery(next_id) {
+    glGenQueries(1, &gl_query_id_);
+  }
+
+  void Destroy() override {
+    glDeleteQueries(1, &gl_query_id_);
+  }
+
+  scoped_refptr<QueryResult> StartQuery(GPUTimingImpl* gpu_timing) {
+    DCHECK(query_result_start_.get() == nullptr);
+    query_begin_cpu_time_ = gpu_timing->GetCurrentCPUTime();
+    if (gpu_timing->GetElapsedQueryCount() == 0) {
+      first_top_level_query_ = true;
+    } else {
+      // Stop the current timer query.
+      glEndQuery(GL_TIME_ELAPSED);
+    }
+
+    // begin a new one time elapsed query.
+    glBeginQuery(GL_TIME_ELAPSED, gl_query_id_);
+    query_result_start_ = new QueryResult();
+
+    // Update GPUTiming state.
+    gpu_timing->SetLastElapsedQuery(this);
+    gpu_timing->IncElapsedQueryCount();
+
+    return query_result_start_;
+  }
+
+  void EndQuery(GPUTimingImpl* gpu_timing,
+                scoped_refptr<QueryResult> result) {
+    DCHECK(gpu_timing->GetElapsedQueryCount() != 0);
+
+    scoped_refptr<TimeElapsedTimerQuery> last_query =
+        gpu_timing->GetLastElapsedQuery();
+    DCHECK(last_query.get());
+    DCHECK(last_query->query_result_end_.get() == nullptr);
+
+    last_query->query_result_end_ = result;
+    gpu_timing->DecElapsedQueryCount();
+
+    if (gpu_timing->GetElapsedQueryCount() != 0) {
+      // Continue timer if there are still ongoing queries.
+      glEndQuery(GL_TIME_ELAPSED);
+      glBeginQuery(GL_TIME_ELAPSED, gl_query_id_);
+      gpu_timing->SetLastElapsedQuery(this);
+    } else {
+      // Simply end the query and reset the current offset
+      glEndQuery(GL_TIME_ELAPSED);
+      gpu_timing->SetLastElapsedQuery(nullptr);
+    }
+  }
+
+  // Returns true when UpdateQueryResults() is ready to be called.
+  bool IsAvailable(GPUTimingImpl* gpu_timing) override {
+    if (gpu_timing->GetElapsedQueryCount() != 0 &&
+        gpu_timing->GetLastElapsedQuery() == this) {
+      // Cannot query if result is available if EndQuery has not been called.
+      // Since only one query is going on at a time, the end query is only not
+      // called for the very last query when ongoing query counter is not 0.
+      return false;
+    }
+
+    GLint done = 0;
+    glGetQueryObjectiv(gl_query_id_, GL_QUERY_RESULT_AVAILABLE, &done);
+    return !!done;
+  }
+
+  // Fills out query result start and end, called after IsAvailable() is true.
+  void UpdateQueryResults(GPUTimingImpl* gpu_timing) override {
+    DCHECK(IsAvailable(gpu_timing));
+
+    GLuint64 result_value = 0;
+    glGetQueryObjectui64v(gl_query_id_, GL_QUERY_RESULT, &result_value);
+    const int64_t micro_results = NanoToMicro(result_value);
+
+    // Adjust prev query end time if it is before the current max.
+    const int64_t start_time =
+        std::max(first_top_level_query_ ? query_begin_cpu_time_ : 0,
+                 std::max(prev_query_end_time_,
+                          gpu_timing->GetMaxTimeStamp()));
+
+    // As a sanity check, is result value is greater than the time allotted we
+    // can safely say this is garbage data
+    const int64_t max_possible_time =
+        gpu_timing->GetCurrentCPUTime() - query_begin_cpu_time_;
+    if (micro_results > max_possible_time) {
+      gpu_timing->HandleBadQuery();
+    }
+
+    // Elapsed queries need to be adjusted so they are relative to one another.
+    // Absolute timer queries are already relative to one another absolutely.
+    time_stamp_ = start_time + micro_results;
+
+    if (query_result_start_.get()) {
+      query_result_start_->SetStartValue(start_time);
+    }
+    if (query_result_end_.get()) {
+      query_result_end_->SetEndValue(time_stamp_);
+    }
+  }
+
+  // Called when Query is next in line, used to transition states.
+  void PrepareNextUpdate(scoped_refptr<TimerQuery> prev) override {
+    prev_query_end_time_ = prev->time_stamp_;
+  }
+
+ private:
+  ~TimeElapsedTimerQuery() override {}
+
+  bool first_top_level_query_ = false;
+  uint32_t gl_query_id_ = 0;
+  int64_t prev_query_end_time_ = 0;
+  int64_t query_begin_cpu_time_ = 0;
+  scoped_refptr<QueryResult> query_result_start_;
+  scoped_refptr<QueryResult> query_result_end_;
+};
+
+class TimeStampTimerQuery : public TimerQuery {
+ public:
+  TimeStampTimerQuery(uint32_t next_id)
+      : TimerQuery(next_id) {
+    glGenQueries(1, &gl_query_id_);
+  }
+
+  void Destroy() override {
+    glDeleteQueries(1, &gl_query_id_);
+  }
+
+  scoped_refptr<QueryResult> DoQuery() {
+    glQueryCounter(gl_query_id_, GL_TIMESTAMP);
+    query_result_ = new QueryResult();
+    return query_result_;
+  }
+
+  // Returns true when UpdateQueryResults() is ready to be called.
+  bool IsAvailable(GPUTimingImpl* gpu_timing) override {
+    GLint done = 0;
+    glGetQueryObjectiv(gl_query_id_, GL_QUERY_RESULT_AVAILABLE, &done);
+    return !!done;
+  }
+
+  // Fills out query result start and end, called after IsAvailable() is true.
+  void UpdateQueryResults(GPUTimingImpl* gpu_timing) override {
+    DCHECK(IsAvailable(gpu_timing));
+
+    GLuint64 result_value = 0;
+    glGetQueryObjectui64v(gl_query_id_, GL_QUERY_RESULT, &result_value);
+    const int64_t micro_results = NanoToMicro(result_value);
+
+    const int64 offset = gpu_timing->CalculateTimerOffset();
+    const int64_t adjusted_result = micro_results + offset;
+    DCHECK(query_result_.get());
+    query_result_->SetStartValue(adjusted_result);
+    query_result_->SetEndValue(adjusted_result);
+    time_stamp_ = adjusted_result;
+  }
+
+ private:
+  ~TimeStampTimerQuery() override {}
+  uint32_t gl_query_id_ = 0;
+  scoped_refptr<QueryResult> query_result_;
+};
+
+GPUTimingImpl::GPUTimingImpl(GLContextReal* context) {
   DCHECK(context);
   const GLVersionInfo* version_info = context->GetVersionInfo();
   DCHECK(version_info);
   if (version_info->is_es3 &&  // glGetInteger64v is supported under ES3.
     context->HasExtension("GL_EXT_disjoint_timer_query")) {
-    timer_type_ = kTimerTypeDisjoint;
+    timer_type_ = GPUTiming::kTimerTypeDisjoint;
   } else if (context->HasExtension("GL_ARB_timer_query")) {
-    timer_type_ = kTimerTypeARB;
+    timer_type_ = GPUTiming::kTimerTypeARB;
   } else if (context->HasExtension("GL_EXT_timer_query")) {
-    timer_type_ = kTimerTypeEXT;
+    timer_type_ = GPUTiming::kTimerTypeEXT;
   }
 }
 
-GPUTiming::~GPUTiming() {
-}
-
-scoped_refptr<GPUTimingClient> GPUTiming::CreateGPUTimingClient() {
-  return new GPUTimingClient(this);
+GPUTimingImpl::~GPUTimingImpl() {
 }
 
-uint32_t GPUTiming::GetDisjointCount() {
-  if (timer_type_ == kTimerTypeDisjoint) {
+uint32_t GPUTimingImpl::GetDisjointCount() {
+  if (timer_type_ == GPUTiming::kTimerTypeDisjoint) {
     GLint disjoint_value = 0;
     glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjoint_value);
     if (disjoint_value) {
@@ -44,19 +333,16 @@ uint32_t GPUTiming::GetDisjointCount() {
   return disjoint_counter_;
 }
 
-int64 GPUTiming::CalculateTimerOffset(base::Callback<int64(void)> cpu_time) {
+int64 GPUTimingImpl::CalculateTimerOffset() {
   if (!offset_valid_) {
-    if (timer_type_ == kTimerTypeDisjoint || timer_type_ == kTimerTypeARB) {
+    if (timer_type_ == GPUTiming::kTimerTypeDisjoint ||
+        timer_type_ == GPUTiming::kTimerTypeARB) {
       GLint64 gl_now = 0;
       glGetInteger64v(GL_TIMESTAMP, &gl_now);
-      int64 now =
-          cpu_time.is_null()
-          ? (base::TraceTicks::Now() - base::TraceTicks()).InMicroseconds()
-          : cpu_time.Run();
-      offset_ = now - gl_now / base::Time::kNanosecondsPerMicrosecond;
-      offset_valid_ = (timer_type_ == kTimerTypeARB);
+      int64_t micro_now = NanoToMicro(gl_now);
+      offset_ = GetCurrentCPUTime() - micro_now;
+      offset_valid_ = (timer_type_ == GPUTiming::kTimerTypeARB);
     } else {
-      // TODO(dyen): figure out how to calculate the offset for EXT_Timer_Query.
       offset_ = 0;
       offset_valid_ = true;
     }
@@ -64,131 +350,193 @@ int64 GPUTiming::CalculateTimerOffset(base::Callback<int64(void)> cpu_time) {
   return offset_;
 }
 
-void GPUTiming::InvalidateTimerOffset() {
+scoped_refptr<QueryResult> GPUTimingImpl::BeginElapsedTimeQuery() {
+  DCHECK(timer_type_ != GPUTiming::kTimerTypeInvalid);
+
+  queries_.push_back(new TimeElapsedTimerQuery(this, next_timer_query_id_++));
+  return static_cast<TimeElapsedTimerQuery*>(
+      queries_.back().get())->StartQuery(this);
+}
+
+void GPUTimingImpl::EndElapsedTimeQuery(scoped_refptr<QueryResult> result) {
+  DCHECK(timer_type_ != GPUTiming::kTimerTypeInvalid);
+  DCHECK(result.get());
+
+  if (GetElapsedQueryCount() > 1) {
+    // Create new elapsed timer query if there are still ongoing queries.
+    queries_.push_back(new TimeElapsedTimerQuery(this,
+                                                 next_timer_query_id_++));
+    static_cast<TimeElapsedTimerQuery*>(
+        queries_.back().get())->EndQuery(this, result);
+  } else {
+    // Simply end the query and reset the current offset
+    DCHECK(GetLastElapsedQuery().get());
+    GetLastElapsedQuery()->EndQuery(this, result);
+    DCHECK(GetLastElapsedQuery().get() == nullptr);
+  }
+}
+
+scoped_refptr<QueryResult> GPUTimingImpl::DoTimeStampQuery() {
+  DCHECK(timer_type_ == GPUTiming::kTimerTypeDisjoint ||
+         timer_type_ == GPUTiming::kTimerTypeARB);
+
+  if (force_time_elapsed_query_) {
+    // Replace with elapsed timer queries instead.
+    scoped_refptr<QueryResult> result = BeginElapsedTimeQuery();
+    EndElapsedTimeQuery(result);
+    return result;
+  }
+
+  queries_.push_back(new TimeStampTimerQuery(next_timer_query_id_++));
+  return static_cast<TimeStampTimerQuery*>(queries_.back().get())->DoQuery();
+}
+
+void GPUTimingImpl::UpdateQueryResults() {
+  // Query availability of and count the queries that are available.
+  int available_queries = 0;
+  for (const scoped_refptr<TimerQuery>& query : queries_) {
+    if (!query->IsAvailable(this))
+      break;
+    available_queries++;
+  }
+
+  // Check for disjoints, this must be done after we checked for availability.
+  const uint32_t disjoint_counter = GetDisjointCount();
+  if (disjoint_counter != query_disjoint_count_) {
+    next_good_timer_query_id_ = next_timer_query_id_;
+    query_disjoint_count_ = disjoint_counter;
+  }
+
+  // Fill in the query result data once we know the disjoint value is updated.
+  // Note that even if disjoint happened and the values may or may not be
+  // garbage, we still fill it in and let GPUTimingClient's detect and disgard
+  // bad query data. The only thing we need to account for here is to not
+  // use garbade timer data to fill states such as max query times.
+  for (int i = 0; i < available_queries; ++i) {
+    scoped_refptr<TimerQuery> query = queries_.front();
+
+    query->UpdateQueryResults(this);
+    DCHECK(query->time_stamp_) << "Query Timestamp was not updated.";
+
+    // For good queries, keep track of the max valid time stamps.
+    if (IsGoodQueryID(query->timer_query_id_))
+      UpdateMaxTimeStamp(query->time_stamp_);
+
+    query->Destroy();
+    queries_.pop_front();
+
+    if (!queries_.empty())
+      queries_.front()->PrepareNextUpdate(query);
+  }
+}
+
+void GPUTimingImpl::SetLastElapsedQuery(
+    scoped_refptr<TimeElapsedTimerQuery> query) {
+  last_elapsed_query_ = query;
+}
+
+scoped_refptr<TimeElapsedTimerQuery> GPUTimingImpl::GetLastElapsedQuery() {
+  return last_elapsed_query_;
+}
+
+void GPUTimingImpl::HandleBadQuery() {
+  // Mark all queries as bad and signal an artificial disjoint value.
+  next_good_timer_query_id_ = next_timer_query_id_;
   offset_valid_ = false;
+  query_disjoint_count_ = ++disjoint_counter_;
+}
+
+bool GPUTimingImpl::IsGoodQueryID(uint32_t query_id) {
+  return query_id >= next_good_timer_query_id_;
+}
+
+scoped_refptr<GPUTimingClient> GPUTimingImpl::CreateGPUTimingClient() {
+  return new GPUTimingClient(this);
+}
+
+GPUTiming* GPUTiming::CreateGPUTiming(GLContextReal* context) {
+  return new GPUTimingImpl(context);
+}
+
+GPUTiming::GPUTiming() {
+}
+
+GPUTiming::~GPUTiming() {
 }
 
 GPUTimer::~GPUTimer() {
-  // Destroy() must be called before the destructor.
-  DCHECK(queries_[0] == 0);
-  DCHECK(queries_[1] == 0);
 }
 
 void GPUTimer::Destroy(bool have_context) {
   if (have_context) {
-    glDeleteQueries(2, queries_);
+    if (!end_requested_) {
+      DCHECK(gpu_timing_client_->gpu_timing_);
+      DCHECK(elapsed_timer_result_.get());
+      gpu_timing_client_->gpu_timing_->EndElapsedTimeQuery(
+          elapsed_timer_result_);
+    }
   }
-  memset(queries_, 0, sizeof(queries_));
 }
 
 void GPUTimer::Start() {
-  switch (gpu_timing_client_->gpu_timing_->timer_type_) {
-    case GPUTiming::kTimerTypeARB:
-    case GPUTiming::kTimerTypeDisjoint:
-      // GL_TIMESTAMP and GL_TIMESTAMP_EXT both have the same value.
-      glQueryCounter(queries_[0], GL_TIMESTAMP);
-      break;
-    case GPUTiming::kTimerTypeEXT:
-      glBeginQuery(GL_TIME_ELAPSED_EXT, queries_[0]);
-      break;
-    default:
-      NOTREACHED();
-  }
+  DCHECK(gpu_timing_client_->gpu_timing_);
+  if (!use_elapsed_timer_)
+    time_stamp_result_ = gpu_timing_client_->gpu_timing_->DoTimeStampQuery();
+
+  elapsed_timer_result_ =
+      gpu_timing_client_->gpu_timing_->BeginElapsedTimeQuery();
 }
 
 void GPUTimer::End() {
+  DCHECK(elapsed_timer_result_.get());
   end_requested_ = true;
-  DCHECK(gpu_timing_client_->gpu_timing_);
-  switch (gpu_timing_client_->gpu_timing_->timer_type_) {
-    case GPUTiming::kTimerTypeARB:
-    case GPUTiming::kTimerTypeDisjoint:
-      glQueryCounter(queries_[1], GL_TIMESTAMP);
-      break;
-    case GPUTiming::kTimerTypeEXT:
-      glEndQuery(GL_TIME_ELAPSED_EXT);
-      break;
-    default:
-      NOTREACHED();
-  }
+  gpu_timing_client_->gpu_timing_->EndElapsedTimeQuery(elapsed_timer_result_);
 }
 
 bool GPUTimer::IsAvailable() {
-  if (!gpu_timing_client_->IsAvailable() || !end_requested_)
+  if (!end_requested_)
     return false;
-
   if (!end_available_) {
-    GLint done = 0;
-    glGetQueryObjectiv(queries_[1] ? queries_[1] : queries_[0],
-                       GL_QUERY_RESULT_AVAILABLE, &done);
-    if (done) {
+    DCHECK(elapsed_timer_result_.get());
+    if (elapsed_timer_result_->IsAvailable()) {
       end_available_ = true;
-      offset_ = gpu_timing_client_->CalculateTimerOffset();
+    } else {
+      gpu_timing_client_->gpu_timing_->UpdateQueryResults();
+      end_available_ = elapsed_timer_result_->IsAvailable();
     }
   }
-
   return end_available_;
 }
 
 void GPUTimer::GetStartEndTimestamps(int64* start, int64* end) {
   DCHECK(start && end);
+  DCHECK(elapsed_timer_result_.get());
   DCHECK(IsAvailable());
-  DCHECK(gpu_timing_client_->gpu_timing_);
-  DCHECK(gpu_timing_client_->gpu_timing_->timer_type_ !=
-         GPUTiming::kTimerTypeEXT);
-  GLuint64 begin_stamp = 0;
-  GLuint64 end_stamp = 0;
-  // TODO(dsinclair): It's possible for the timer to wrap during the start/end.
-  // We need to detect if the end is less then the start and correct for the
-  // wrapping.
-  glGetQueryObjectui64v(queries_[0], GL_QUERY_RESULT, &begin_stamp);
-  glGetQueryObjectui64v(queries_[1], GL_QUERY_RESULT, &end_stamp);
-
-  *start = (begin_stamp / base::Time::kNanosecondsPerMicrosecond) + offset_;
-  *end = (end_stamp / base::Time::kNanosecondsPerMicrosecond) + offset_;
+  if (time_stamp_result_.get()) {
+    DCHECK(time_stamp_result_->IsAvailable());
+    const int64_t time_stamp = time_stamp_result_->GetStartValue();
+    *start = time_stamp;
+    *end = time_stamp + elapsed_timer_result_->GetDelta();
+  } else {
+    // Use estimation from elasped timer results.
+    *start = elapsed_timer_result_->GetStartValue();
+    *end = elapsed_timer_result_->GetEndValue();
+  }
 }
 
 int64 GPUTimer::GetDeltaElapsed() {
-  DCHECK(gpu_timing_client_->gpu_timing_);
-  switch (gpu_timing_client_->gpu_timing_->timer_type_) {
-    case GPUTiming::kTimerTypeARB:
-    case GPUTiming::kTimerTypeDisjoint: {
-      int64 start = 0;
-      int64 end = 0;
-      GetStartEndTimestamps(&start, &end);
-      return end - start;
-    } break;
-    case GPUTiming::kTimerTypeEXT: {
-      GLuint64 delta = 0;
-      glGetQueryObjectui64v(queries_[0], GL_QUERY_RESULT, &delta);
-      return static_cast<int64>(delta / base::Time::kNanosecondsPerMicrosecond);
-    } break;
-    default:
-      NOTREACHED();
-  }
-  return 0;
+  DCHECK(elapsed_timer_result_.get());
+  DCHECK(IsAvailable());
+  return elapsed_timer_result_->GetDelta();
 }
 
-GPUTimer::GPUTimer(scoped_refptr<GPUTimingClient> gpu_timing_client)
-    : gpu_timing_client_(gpu_timing_client) {
-  DCHECK(gpu_timing_client_);
-  memset(queries_, 0, sizeof(queries_));
-  int queries = 0;
-  DCHECK(gpu_timing_client_->gpu_timing_);
-  switch (gpu_timing_client_->gpu_timing_->timer_type_) {
-    case GPUTiming::kTimerTypeARB:
-    case GPUTiming::kTimerTypeDisjoint:
-      queries = 2;
-      break;
-    case GPUTiming::kTimerTypeEXT:
-      queries = 1;
-      break;
-    default:
-      NOTREACHED();
-  }
-  glGenQueries(queries, queries_);
+GPUTimer::GPUTimer(scoped_refptr<GPUTimingClient> gpu_timing_client,
+                   bool use_elapsed_timer)
+    : use_elapsed_timer_(use_elapsed_timer),
+      gpu_timing_client_(gpu_timing_client) {
 }
 
-GPUTimingClient::GPUTimingClient(GPUTiming* gpu_timing)
+GPUTimingClient::GPUTimingClient(GPUTimingImpl* gpu_timing)
     : gpu_timing_(gpu_timing) {
   if (gpu_timing) {
     timer_type_ = gpu_timing->GetTimerType();
@@ -196,19 +544,18 @@ GPUTimingClient::GPUTimingClient(GPUTiming* gpu_timing)
   }
 }
 
-scoped_ptr<GPUTimer> GPUTimingClient::CreateGPUTimer() {
-  return make_scoped_ptr(new GPUTimer(this));
+scoped_ptr<GPUTimer> GPUTimingClient::CreateGPUTimer(bool prefer_elapsed_time) {
+  prefer_elapsed_time |= (timer_type_ == GPUTiming::kTimerTypeEXT);
+  if (gpu_timing_)
+    prefer_elapsed_time |= gpu_timing_->IsForceTimeElapsedQuery();
+
+  return make_scoped_ptr(new GPUTimer(this, prefer_elapsed_time));
 }
 
 bool GPUTimingClient::IsAvailable() {
   return timer_type_ != GPUTiming::kTimerTypeInvalid;
 }
 
-bool GPUTimingClient::IsTimerOffsetAvailable() {
-  return timer_type_ == GPUTiming::kTimerTypeARB ||
-         timer_type_ == GPUTiming::kTimerTypeDisjoint;
-}
-
 const char* GPUTimingClient::GetTimerTypeName() const {
   switch (timer_type_) {
     case GPUTiming::kTimerTypeDisjoint:
@@ -233,28 +580,25 @@ bool GPUTimingClient::CheckAndResetTimerErrors() {
   return false;
 }
 
-int64 GPUTimingClient::CalculateTimerOffset() {
-  DCHECK(IsTimerOffsetAvailable());
-  return gpu_timing_
-         ? gpu_timing_->CalculateTimerOffset(cpu_time_for_testing_)
-         : 0;
-}
-
-void GPUTimingClient::InvalidateTimerOffset() {
-  if (gpu_timing_) {
-    gpu_timing_->InvalidateTimerOffset();
-  }
+int64 GPUTimingClient::GetCurrentCPUTime() {
+  DCHECK(gpu_timing_);
+  return gpu_timing_->GetCurrentCPUTime();
 }
 
 void GPUTimingClient::SetCpuTimeForTesting(
     const base::Callback<int64(void)>& cpu_time) {
-  cpu_time_for_testing_ = cpu_time;
+  DCHECK(gpu_timing_);
+  gpu_timing_->SetCpuTimeForTesting(cpu_time);
 }
 
-int64 GPUTimingClient::GetCurrentCPUTime() {
-  return cpu_time_for_testing_.is_null()
-         ? (base::TraceTicks::Now() - base::TraceTicks()).InMicroseconds()
-         : cpu_time_for_testing_.Run();
+bool GPUTimingClient::IsForceTimeElapsedQuery() {
+  DCHECK(gpu_timing_);
+  return gpu_timing_->IsForceTimeElapsedQuery();
+}
+
+void GPUTimingClient::ForceTimeElapsedQuery() {
+  DCHECK(gpu_timing_);
+  gpu_timing_->ForceTimeElapsedQuery();
 }
 
 GPUTimingClient::~GPUTimingClient() {
diff --git a/ui/gl/gpu_timing.h b/ui/gl/gpu_timing.h
index 6d1ccedabf93..d8e3dc25a155 100644
--- a/ui/gl/gpu_timing.h
+++ b/ui/gl/gpu_timing.h
@@ -5,6 +5,8 @@
 #ifndef UI_GL_GPU_TIMING_H_
 #define UI_GL_GPU_TIMING_H_
 
+#include <queue>
+
 #include "base/callback.h"
 #include "base/memory/scoped_ptr.h"
 #include "ui/gl/gl_export.h"
@@ -41,7 +43,12 @@
 namespace gfx {
 
 class GLContextReal;
+class GPUTiming;
 class GPUTimingClient;
+class GPUTimingImpl;
+class QueryResult;
+class TimeElapsedTimerQuery;
+class TimerQuery;
 
 class GPUTiming {
  public:
@@ -53,25 +60,17 @@ class GPUTiming {
     kTimerTypeDisjoint  // EXT_disjoint_timer_query
   };
 
-  TimerType GetTimerType() const { return timer_type_; }
-  uint32_t GetDisjointCount();
-
-  int64 CalculateTimerOffset(base::Callback<int64(void)> cpu_time);
-  void InvalidateTimerOffset();
-
- private:
+ protected:
   friend struct base::DefaultDeleter<GPUTiming>;
   friend class GLContextReal;
-  friend class GPUTimer;
-  explicit GPUTiming(GLContextReal* context);
-  ~GPUTiming();
 
-  scoped_refptr<GPUTimingClient> CreateGPUTimingClient();
+  static GPUTiming* CreateGPUTiming(GLContextReal* context);
+
+  GPUTiming();
+  virtual ~GPUTiming();
+
+  virtual scoped_refptr<GPUTimingClient> CreateGPUTimingClient() = 0;
 
-  TimerType timer_type_ = kTimerTypeInvalid;
-  uint32_t disjoint_counter_ = 0;
-  int64 offset_ = 0;  // offset cache when timer_type_ == kTimerTypeARB
-  bool offset_valid_ = false;
   DISALLOW_COPY_AND_ASSIGN(GPUTiming);
 };
 
@@ -79,31 +78,38 @@ class GPUTiming {
 // complete a set of GL commands
 class GL_EXPORT GPUTimer {
  public:
+  static void DisableTimestampQueries();
+
   ~GPUTimer();
 
   // Destroy the timer object. This must be explicitly called before destroying
   // this object.
   void Destroy(bool have_context);
 
+  // Start an instant timer, start and end will be equal.
+  void Instant();
+
+  // Start a timer range.
   void Start();
   void End();
+
   bool IsAvailable();
 
   void GetStartEndTimestamps(int64* start, int64* end);
   int64 GetDeltaElapsed();
 
-  int64 GetOffset() const { return offset_; }
-
  private:
   friend class GPUTimingClient;
 
-  explicit GPUTimer(scoped_refptr<GPUTimingClient> gpu_timing_client);
+  explicit GPUTimer(scoped_refptr<GPUTimingClient> gpu_timing_client,
+                    bool use_elapsed_timer);
 
-  unsigned int queries_[2];
-  int64 offset_ = 0;
+  bool use_elapsed_timer_ = false;
   bool end_requested_ = false;
   bool end_available_ = false;
   scoped_refptr<GPUTimingClient> gpu_timing_client_;
+  scoped_refptr<QueryResult> time_stamp_result_;
+  scoped_refptr<QueryResult> elapsed_timer_result_;
 
   DISALLOW_COPY_AND_ASSIGN(GPUTimer);
 };
@@ -113,11 +119,10 @@ class GL_EXPORT GPUTimer {
 class GL_EXPORT GPUTimingClient
     : public base::RefCounted<GPUTimingClient> {
  public:
-  explicit GPUTimingClient(GPUTiming* gpu_timing = nullptr);
+  explicit GPUTimingClient(GPUTimingImpl* gpu_timing = nullptr);
 
-  scoped_ptr<GPUTimer> CreateGPUTimer();
+  scoped_ptr<GPUTimer> CreateGPUTimer(bool prefer_elapsed_time);
   bool IsAvailable();
-  bool IsTimerOffsetAvailable();
 
   const char* GetTimerTypeName() const;
 
@@ -127,25 +132,22 @@ class GL_EXPORT GPUTimingClient
   // If the returned value is false, all the previous timers should be
   // discarded.
   bool CheckAndResetTimerErrors();
-  void InvalidateTimerOffset();
 
-  void SetCpuTimeForTesting(const base::Callback<int64(void)>& cpu_time);
   int64 GetCurrentCPUTime();
+  void SetCpuTimeForTesting(const base::Callback<int64(void)>& cpu_time);
+
+  bool IsForceTimeElapsedQuery();
+  void ForceTimeElapsedQuery();
 
  private:
   friend class base::RefCounted<GPUTimingClient>;
   friend class GPUTimer;
-  friend class GPUTiming;
 
   virtual ~GPUTimingClient();
 
-  // Returns the offset between the current gpu time and the cpu time.
-  int64 CalculateTimerOffset();
-
-  GPUTiming* gpu_timing_;
+  GPUTimingImpl* gpu_timing_;
   GPUTiming::TimerType timer_type_ = GPUTiming::kTimerTypeInvalid;
   uint32_t disjoint_counter_ = 0;
-  base::Callback<int64(void)> cpu_time_for_testing_;
 
   DISALLOW_COPY_AND_ASSIGN(GPUTimingClient);
 };
diff --git a/ui/gl/gpu_timing_fake.cc b/ui/gl/gpu_timing_fake.cc
index 486a0c963a93..6a1e540c6df7 100644
--- a/ui/gl/gpu_timing_fake.cc
+++ b/ui/gl/gpu_timing_fake.cc
@@ -55,33 +55,28 @@ void GPUTimingFake::ExpectNoDisjointCalls(MockGLInterface& gl) {
 
 void GPUTimingFake::ExpectGPUTimerQuery(
     MockGLInterface& gl, bool elapsed_query) {
-  if (elapsed_query) {
-    // Currently do not support elapsed queries.
-    return;
-  }
-
-  EXPECT_CALL(gl, GenQueries(2, NotNull())).Times(AtLeast(1))
+  EXPECT_CALL(gl, GenQueries(1, NotNull())).Times(AtLeast(2))
       .WillRepeatedly(Invoke(this, &GPUTimingFake::FakeGLGenQueries));
 
-  if (elapsed_query) {
-    // Time Elapsed based queries.
-    EXPECT_CALL(gl, BeginQuery(GL_TIME_ELAPSED, _))
-        .WillRepeatedly(
-            Invoke(this, &GPUTimingFake::FakeGLBeginQuery));
-
-    EXPECT_CALL(gl, EndQuery(GL_TIME_ELAPSED))
-        .WillRepeatedly(Invoke(this, &GPUTimingFake::FakeGLEndQuery));
-  } else {
+  if (!elapsed_query) {
     // Time Stamp based queries.
     EXPECT_CALL(gl, GetInteger64v(GL_TIMESTAMP, _))
         .WillRepeatedly(
             Invoke(this, &GPUTimingFake::FakeGLGetInteger64v));
 
-    EXPECT_CALL(gl, QueryCounter(_, GL_TIMESTAMP)).Times(AtLeast(2))
+    EXPECT_CALL(gl, QueryCounter(_, GL_TIMESTAMP)).Times(AtLeast(1))
         .WillRepeatedly(
              Invoke(this, &GPUTimingFake::FakeGLQueryCounter));
   }
 
+  // Time Elapsed based queries.
+  EXPECT_CALL(gl, BeginQuery(GL_TIME_ELAPSED, _))
+      .WillRepeatedly(
+          Invoke(this, &GPUTimingFake::FakeGLBeginQuery));
+
+  EXPECT_CALL(gl, EndQuery(GL_TIME_ELAPSED))
+      .WillRepeatedly(Invoke(this, &GPUTimingFake::FakeGLEndQuery));
+
   EXPECT_CALL(gl, GetQueryObjectiv(_, GL_QUERY_RESULT_AVAILABLE,
                                         NotNull()))
       .WillRepeatedly(
@@ -91,7 +86,7 @@ void GPUTimingFake::ExpectGPUTimerQuery(
       .WillRepeatedly(
            Invoke(this, &GPUTimingFake::FakeGLGetQueryObjectui64v));
 
-  EXPECT_CALL(gl, DeleteQueries(2, NotNull())).Times(AtLeast(1))
+  EXPECT_CALL(gl, DeleteQueries(1, NotNull())).Times(AtLeast(2))
       .WillRepeatedly(
            Invoke(this, &GPUTimingFake::FakeGLDeleteQueries));
 }
diff --git a/ui/gl/gpu_timing_unittest.cc b/ui/gl/gpu_timing_unittest.cc
index a567ce6363c1..9044394d697a 100644
--- a/ui/gl/gpu_timing_unittest.cc
+++ b/ui/gl/gpu_timing_unittest.cc
@@ -19,6 +19,8 @@ class GPUTimingTest : public testing::Test {
   void SetUp() override {
     setup_ = false;
     fake_cpu_time_ = 0;
+
+    CreateGPUTimingClient()->SetCpuTimeForTesting(base::Bind(&GetFakeCPUTime));
   }
 
   void TearDown() override {
@@ -70,7 +72,6 @@ TEST_F(GPUTimingTest, FakeTimerTest) {
   SetFakeCPUTime(123);
 
   scoped_refptr<GPUTimingClient> gpu_timing_client = CreateGPUTimingClient();
-  gpu_timing_client->SetCpuTimeForTesting(base::Bind(&GetFakeCPUTime));
   EXPECT_EQ(123, gpu_timing_client->GetCurrentCPUTime());
 
   base::Callback<int64_t(void)> empty;
@@ -78,4 +79,20 @@ TEST_F(GPUTimingTest, FakeTimerTest) {
   EXPECT_NE(123, gpu_timing_client->GetCurrentCPUTime());
 }
 
+TEST_F(GPUTimingTest, ForceTimeElapsedQuery) {
+  // Test that forcing time elapsed query affects all clients.
+  scoped_refptr<GPUTimingClient> client1 = CreateGPUTimingClient();
+  EXPECT_FALSE(client1->IsForceTimeElapsedQuery());
+
+  scoped_refptr<GPUTimingClient> client_force = CreateGPUTimingClient();
+  EXPECT_FALSE(client1->IsForceTimeElapsedQuery());
+  client_force->ForceTimeElapsedQuery();
+  EXPECT_TRUE(client1->IsForceTimeElapsedQuery());
+
+  EXPECT_TRUE(client1->IsForceTimeElapsedQuery());
+
+  scoped_refptr<GPUTimingClient> client2 = CreateGPUTimingClient();
+  EXPECT_TRUE(client2->IsForceTimeElapsedQuery());
+}
+
 }  // namespace gpu
-- 
2.11.4.GIT