Pin Chrome's shortcut to the Win10 Start menu on install and OS upgrade.
[chromium-blink-merge.git] / native_client_sdk / src / examples / demo / life_simd / life.cc
blobb99f2bc934f2d55d122522dc3f220e5e18a18034
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <assert.h>
6 #include <math.h>
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <sys/time.h>
12 #include <unistd.h>
14 #include <ppapi/c/ppb_input_event.h>
15 #include <ppapi/cpp/fullscreen.h>
16 #include <ppapi/cpp/input_event.h>
17 #include <ppapi/cpp/instance_handle.h>
18 #include <ppapi/cpp/var.h>
19 #include <ppapi/cpp/var_array.h>
20 #include <ppapi/cpp/var_array_buffer.h>
21 #include <ppapi/cpp/var_dictionary.h>
23 #include "ppapi_simple/ps.h"
24 #include "ppapi_simple/ps_context_2d.h"
25 #include "ppapi_simple/ps_event.h"
26 #include "ppapi_simple/ps_instance.h"
27 #include "ppapi_simple/ps_interface.h"
28 #include "ppapi_simple/ps_main.h"
29 #include "sdk_util/macros.h"
30 #include "sdk_util/thread_pool.h"
32 using namespace sdk_util; // For sdk_util::ThreadPool
34 namespace {
36 #define INLINE inline __attribute__((always_inline))
38 // BGRA helper macro, for constructing a pixel for a BGRA buffer.
39 #define MakeBGRA(b, g, r, a) \
40 (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
42 const int kFramesToBenchmark = 100;
43 const int kCellAlignment = 0x10;
45 // 128 bit vector types
46 typedef uint8_t u8x16_t __attribute__ ((vector_size (16)));
48 // Helper function to broadcast x across 16 element vector.
49 INLINE u8x16_t broadcast(uint8_t x) {
50 u8x16_t r = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
51 return r;
54 // Convert a count value into a live (green) or dead color value.
55 const uint32_t kNeighborColors[] = {
56 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
57 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
58 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
59 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
60 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
61 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
62 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
63 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
64 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
65 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
66 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
67 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
68 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
69 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
70 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
71 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
72 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
73 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
76 // These represent the new health value of a cell based on its neighboring
77 // values. The health is binary: either alive or dead.
78 const uint8_t kIsAlive[] = {
79 0, 0, 0, 0, 0, 1, 1, 1, 0,
80 0, 0, 0, 0, 0, 0, 0, 0, 0
83 // Timer helper for benchmarking. Returns seconds elapsed since program start,
84 // as a double.
85 timeval start_tv;
86 int start_tv_retv = gettimeofday(&start_tv, NULL);
88 inline double getseconds() {
89 const double usec_to_sec = 0.000001;
90 timeval tv;
91 if ((0 == start_tv_retv) && (0 == gettimeofday(&tv, NULL)))
92 return (tv.tv_sec - start_tv.tv_sec) + tv.tv_usec * usec_to_sec;
93 return 0.0;
95 } // namespace
98 class Life {
99 public:
100 Life();
101 virtual ~Life();
102 // Runs a tick of the simulations, update 2D output.
103 void Update();
104 // Handle event from user, or message from JS.
105 void HandleEvent(PSEvent* ps_event);
106 private:
107 void UpdateContext();
108 void DrawCell(int32_t x, int32_t y);
109 void ProcessTouchEvent(const pp::TouchInputEvent& touches);
110 void PostUpdateMessage(const char* message, double value);
111 void StartBenchmark();
112 void EndBenchmark();
113 void Stir();
114 void wSimulate(int y);
115 static void wSimulateEntry(int y, void* data);
116 void Simulate();
118 bool simd_;
119 bool multithread_;
120 bool benchmarking_;
121 int benchmark_frame_counter_;
122 double bench_start_time_;
123 double bench_end_time_;
124 uint8_t* cell_in_;
125 uint8_t* cell_out_;
126 int32_t cell_stride_;
127 int32_t width_;
128 int32_t height_;
129 PSContext2D_t* ps_context_;
130 ThreadPool* workers_;
133 Life::Life() :
134 simd_(true),
135 multithread_(true),
136 benchmarking_(false),
137 benchmark_frame_counter_(0),
138 bench_start_time_(0.0),
139 bench_end_time_(0.0),
140 cell_in_(NULL),
141 cell_out_(NULL),
142 cell_stride_(0),
143 width_(0),
144 height_(0) {
145 ps_context_ = PSContext2DAllocate(PP_IMAGEDATAFORMAT_BGRA_PREMUL);
146 // Query system for number of processors via sysconf()
147 int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
148 if (num_threads < 2)
149 num_threads = 2;
150 workers_ = new ThreadPool(num_threads);
151 PSEventSetFilter(PSE_ALL);
154 Life::~Life() {
155 delete workers_;
156 PSContext2DFree(ps_context_);
159 void Life::UpdateContext() {
160 cell_stride_ = (ps_context_->width + kCellAlignment - 1) &
161 ~(kCellAlignment - 1);
162 size_t size = cell_stride_ * ps_context_->height;
164 if (ps_context_->width != width_ || ps_context_->height != height_) {
165 free(cell_in_);
166 free(cell_out_);
168 // Create a new context
169 void* in_buffer = NULL;
170 void* out_buffer = NULL;
171 // alloc buffers aligned on 16 bytes
172 posix_memalign(&in_buffer, kCellAlignment, size);
173 posix_memalign(&out_buffer, kCellAlignment, size);
174 cell_in_ = (uint8_t*) in_buffer;
175 cell_out_ = (uint8_t*) out_buffer;
177 memset(cell_out_, 0, size);
178 for (size_t index = 0; index < size; index++) {
179 cell_in_[index] = rand() & 1;
181 width_ = ps_context_->width;
182 height_ = ps_context_->height;
186 void Life::DrawCell(int32_t x, int32_t y) {
187 if (!cell_in_) return;
188 if (x > 0 && x < ps_context_->width - 1 &&
189 y > 0 && y < ps_context_->height - 1) {
190 cell_in_[x - 1 + y * cell_stride_] = 1;
191 cell_in_[x + 1 + y * cell_stride_] = 1;
192 cell_in_[x + (y - 1) * cell_stride_] = 1;
193 cell_in_[x + (y + 1) * cell_stride_] = 1;
197 void Life::ProcessTouchEvent(const pp::TouchInputEvent& touches) {
198 uint32_t count = touches.GetTouchCount(PP_TOUCHLIST_TYPE_TOUCHES);
199 uint32_t i, j;
200 for (i = 0; i < count; i++) {
201 pp::TouchPoint touch =
202 touches.GetTouchByIndex(PP_TOUCHLIST_TYPE_TOUCHES, i);
203 int radius = (int)(touch.radii().x());
204 int x = (int)(touch.position().x());
205 int y = (int)(touch.position().y());
206 // num = 1/100th the area of touch point
207 uint32_t num = (uint32_t)(M_PI * radius * radius / 100.0f);
208 for (j = 0; j < num; j++) {
209 int dx = rand() % (radius * 2) - radius;
210 int dy = rand() % (radius * 2) - radius;
211 // only plot random cells within the touch area
212 if (dx * dx + dy * dy <= radius * radius)
213 DrawCell(x + dx, y + dy);
218 void Life::PostUpdateMessage(const char* message_name, double value) {
219 pp::VarDictionary message;
220 message.Set("message", message_name);
221 message.Set("value", value);
222 PSInterfaceMessaging()->PostMessage(PSGetInstanceId(), message.pp_var());
225 void Life::StartBenchmark() {
226 printf("Running benchmark... (SIMD: %s, multi-threading: %s, size: %dx%d)\n",
227 simd_ ? "enabled" : "disabled",
228 multithread_ ? "enabled" : "disabled",
229 ps_context_->width,
230 ps_context_->height);
231 benchmarking_ = true;
232 bench_start_time_ = getseconds();
233 benchmark_frame_counter_ = kFramesToBenchmark;
236 void Life::EndBenchmark() {
237 double total_time;
238 bench_end_time_ = getseconds();
239 benchmarking_ = false;
240 total_time = bench_end_time_ - bench_start_time_;
241 printf("Finished - benchmark took %f seconds\n", total_time);
242 // Send benchmark result to JS.
243 PostUpdateMessage("benchmark_result", total_time);
246 void Life::HandleEvent(PSEvent* ps_event) {
247 // Give the 2D context a chance to process the event.
248 if (0 != PSContext2DHandleEvent(ps_context_, ps_event)) {
249 UpdateContext();
250 return;
253 switch(ps_event->type) {
255 case PSE_INSTANCE_HANDLEINPUT: {
256 pp::InputEvent event(ps_event->as_resource);
258 switch(event.GetType()) {
259 case PP_INPUTEVENT_TYPE_MOUSEDOWN:
260 case PP_INPUTEVENT_TYPE_MOUSEMOVE: {
261 pp::MouseInputEvent mouse = pp::MouseInputEvent(event);
262 // If the button is down, draw
263 if (mouse.GetModifiers() & PP_INPUTEVENT_MODIFIER_LEFTBUTTONDOWN) {
264 PP_Point location = mouse.GetPosition();
265 DrawCell(location.x, location.y);
267 break;
270 case PP_INPUTEVENT_TYPE_TOUCHSTART:
271 case PP_INPUTEVENT_TYPE_TOUCHMOVE: {
272 pp::TouchInputEvent touches = pp::TouchInputEvent(event);
273 ProcessTouchEvent(touches);
274 break;
277 case PP_INPUTEVENT_TYPE_KEYDOWN: {
278 pp::Fullscreen fullscreen((pp::InstanceHandle(PSGetInstanceId())));
279 bool isFullscreen = fullscreen.IsFullscreen();
280 fullscreen.SetFullscreen(!isFullscreen);
281 break;
284 default:
285 break;
287 break; // case PSE_INSTANCE_HANDLEINPUT
290 case PSE_INSTANCE_HANDLEMESSAGE: {
291 // Convert Pepper Simple message to PPAPI C++ vars
292 pp::Var var(ps_event->as_var);
293 if (var.is_dictionary()) {
294 pp::VarDictionary dictionary(var);
295 std::string message = dictionary.Get("message").AsString();
296 if (message == "run_benchmark" && !benchmarking_) {
297 StartBenchmark();
298 } else if (message == "set_simd") {
299 simd_ = dictionary.Get("value").AsBool();
300 } else if (message == "set_threading") {
301 multithread_ = dictionary.Get("value").AsBool();
304 break; // case PSE_INSTANCE_HANDLEMESSAGE
307 default:
308 break;
312 void Life::Stir() {
313 int32_t width = ps_context_->width;
314 int32_t height = ps_context_->height;
315 int32_t stride = cell_stride_;
316 int32_t i;
317 if (cell_in_ == NULL || cell_out_ == NULL)
318 return;
320 for (i = 0; i < width; ++i) {
321 cell_in_[i] = rand() & 1;
322 cell_in_[i + (height - 1) * stride] = rand() & 1;
324 for (i = 0; i < height; ++i) {
325 cell_in_[i * stride] = rand() & 1;
326 cell_in_[i * stride + (width - 1)] = rand() & 1;
330 void Life::wSimulate(int y) {
331 // Don't run simulation on top and bottom borders
332 if (y < 1 || y >= ps_context_->height - 1)
333 return;
335 // Do neighbor summation; apply rules, output pixel color. Note that a 1 cell
336 // wide perimeter is excluded from the simulation update; only cells from
337 // x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated.
338 uint8_t *src0 = (cell_in_ + (y - 1) * cell_stride_);
339 uint8_t *src1 = src0 + cell_stride_;
340 uint8_t *src2 = src1 + cell_stride_;
341 uint8_t *dst = (cell_out_ + y * cell_stride_) + 1;
342 uint32_t *pixels = static_cast<uint32_t *>(ps_context_->data);
343 uint32_t *pixel_line = // static_cast<uint32_t*>
344 (pixels + y * ps_context_->stride / sizeof(uint32_t));
345 int32_t x = 1;
347 if (simd_) {
348 const u8x16_t kOne = broadcast(1);
349 const u8x16_t kFour = broadcast(4);
350 const u8x16_t kEight = broadcast(8);
351 const u8x16_t kZero255 = {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
353 // Prime the src
354 u8x16_t src00 = *reinterpret_cast<u8x16_t*>(&src0[0]);
355 u8x16_t src01 = *reinterpret_cast<u8x16_t*>(&src0[16]);
356 u8x16_t src10 = *reinterpret_cast<u8x16_t*>(&src1[0]);
357 u8x16_t src11 = *reinterpret_cast<u8x16_t*>(&src1[16]);
358 u8x16_t src20 = *reinterpret_cast<u8x16_t*>(&src2[0]);
359 u8x16_t src21 = *reinterpret_cast<u8x16_t*>(&src2[16]);
361 // This inner loop is SIMD - each loop iteration will process 16 cells.
362 for (; (x + 15) < (ps_context_->width - 1); x += 16) {
364 // Construct jittered source temps, using __builtin_shufflevector(..) to
365 // extract a shifted 16 element vector from the 32 element concatenation
366 // of two source vectors.
367 u8x16_t src0j0 = src00;
368 u8x16_t src0j1 = __builtin_shufflevector(src00, src01,
369 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
370 u8x16_t src0j2 = __builtin_shufflevector(src00, src01,
371 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
372 u8x16_t src1j0 = src10;
373 u8x16_t src1j1 = __builtin_shufflevector(src10, src11,
374 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
375 u8x16_t src1j2 = __builtin_shufflevector(src10, src11,
376 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
377 u8x16_t src2j0 = src20;
378 u8x16_t src2j1 = __builtin_shufflevector(src20, src21,
379 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
380 u8x16_t src2j2 = __builtin_shufflevector(src20, src21,
381 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
383 // Sum the jittered sources to construct neighbor count.
384 u8x16_t count = src0j0 + src0j1 + src0j2 +
385 src1j0 + + src1j2 +
386 src2j0 + src2j1 + src2j2;
387 // Add the center cell.
388 count = count + count + src1j1;
389 // If count > 4 and < 8, center cell will be alive in the next frame.
390 u8x16_t alive1 = count > kFour;
391 u8x16_t alive2 = count < kEight;
392 // Intersect the two comparisons from above.
393 u8x16_t alive = alive1 & alive2;
395 // At this point, alive[x] will be one of two values:
396 // 0x00 for a dead cell
397 // 0xFF for an alive cell.
399 // Next, convert alive cells to green pixel color.
400 // Use __builtin_shufflevector(..) to construct output pixels from
401 // concantination of alive vector and kZero255 const vector.
402 // Indices 0..15 select the 16 cells from alive vector.
403 // Index 16 is zero constant from kZero255 constant vector.
404 // Index 17 is 255 constant from kZero255 constant vector.
405 // Output pixel color values are in BGRABGRABGRABGRA order.
406 // Since each pixel needs 4 bytes of color information, 16 cells will
407 // need to expand to 4 seperate 16 byte pixel splats.
408 u8x16_t pixel0_3 = __builtin_shufflevector(alive, kZero255,
409 16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17);
410 u8x16_t pixel4_7 = __builtin_shufflevector(alive, kZero255,
411 16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17);
412 u8x16_t pixel8_11 = __builtin_shufflevector(alive, kZero255,
413 16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17);
414 u8x16_t pixel12_15 = __builtin_shufflevector(alive, kZero255,
415 16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17);
417 // Write 16 pixels to output pixel buffer.
418 *reinterpret_cast<u8x16_t*>(pixel_line + 0) = pixel0_3;
419 *reinterpret_cast<u8x16_t*>(pixel_line + 4) = pixel4_7;
420 *reinterpret_cast<u8x16_t*>(pixel_line + 8) = pixel8_11;
421 *reinterpret_cast<u8x16_t*>(pixel_line + 12) = pixel12_15;
423 // Convert alive mask to 1 or 0 and store in destination cell array.
424 *reinterpret_cast<u8x16_t*>(dst) = alive & kOne;
426 // Increment pointers.
427 pixel_line += 16;
428 dst += 16;
429 src0 += 16;
430 src1 += 16;
431 src2 += 16;
433 // Shift source over by 16 cells and read the next 16 cells.
434 src00 = src01;
435 src01 = *reinterpret_cast<u8x16_t*>(&src0[16]);
436 src10 = src11;
437 src11 = *reinterpret_cast<u8x16_t*>(&src1[16]);
438 src20 = src21;
439 src21 = *reinterpret_cast<u8x16_t*>(&src2[16]);
443 // The SIMD loop above does 16 cells at a time. The loop below is the
444 // regular version which processes one cell at a time. It is used to
445 // finish the remainder of the scanline not handled by the SIMD loop.
446 for (; x < (ps_context_->width - 1); ++x) {
447 // Sum the jittered sources to construct neighbor count.
448 int count = src0[0] + src0[1] + src0[2] +
449 src1[0] + + src1[2] +
450 src2[0] + src2[1] + src2[2];
451 // Add the center cell.
452 count = count + count + src1[1];
453 // Use table lookup indexed by count to determine pixel & alive state.
454 uint32_t color = kNeighborColors[count];
455 *pixel_line++ = color;
456 *dst++ = kIsAlive[count];
457 ++src0;
458 ++src1;
459 ++src2;
463 // Static entry point for worker thread.
464 void Life::wSimulateEntry(int slice, void* thiz) {
465 static_cast<Life*>(thiz)->wSimulate(slice);
468 void Life::Simulate() {
469 // Stir up the edges to prevent the simulation from reaching steady state.
470 Stir();
472 if (multithread_) {
473 // If multi-threading enabled, dispatch tasks to pool of worker threads.
474 workers_->Dispatch(ps_context_->height, wSimulateEntry, this);
475 } else {
476 // Else manually simulate each line on this thread.
477 for (int y = 0; y < ps_context_->height; y++) {
478 wSimulateEntry(y, this);
481 std::swap(cell_in_, cell_out_);
484 void Life::Update() {
486 PSContext2DGetBuffer(ps_context_);
487 if (NULL == ps_context_->data)
488 return;
490 // If we somehow have not allocated these pointers yet, skip this frame.
491 if (!cell_in_ || !cell_out_) return;
493 // Simulate one (or more if benchmarking) frames
494 do {
495 Simulate();
496 if (!benchmarking_)
497 break;
498 --benchmark_frame_counter_;
499 } while(benchmark_frame_counter_ > 0);
500 if (benchmarking_)
501 EndBenchmark();
503 PSContext2DSwapBuffer(ps_context_);
506 // Starting point for the module. We do not use main since it would
507 // collide with main in libppapi_cpp.
508 int example_main(int argc, char* argv[]) {
509 Life life;
510 while (true) {
511 PSEvent* ps_event;
512 // Consume all available events
513 while ((ps_event = PSEventTryAcquire()) != NULL) {
514 life.HandleEvent(ps_event);
515 PSEventRelease(ps_event);
517 // Do simulation, render and present.
518 life.Update();
520 return 0;
523 // Register the function to call once the Instance Object is initialized.
524 // see: pappi_simple/ps_main.h
525 PPAPI_SIMPLE_REGISTER_MAIN(example_main);