1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
14 #include <ppapi/c/ppb_input_event.h>
15 #include <ppapi/cpp/fullscreen.h>
16 #include <ppapi/cpp/input_event.h>
17 #include <ppapi/cpp/instance_handle.h>
18 #include <ppapi/cpp/var.h>
19 #include <ppapi/cpp/var_array.h>
20 #include <ppapi/cpp/var_array_buffer.h>
21 #include <ppapi/cpp/var_dictionary.h>
23 #include "ppapi_simple/ps.h"
24 #include "ppapi_simple/ps_context_2d.h"
25 #include "ppapi_simple/ps_event.h"
26 #include "ppapi_simple/ps_instance.h"
27 #include "ppapi_simple/ps_interface.h"
28 #include "ppapi_simple/ps_main.h"
29 #include "sdk_util/macros.h"
30 #include "sdk_util/thread_pool.h"
32 using namespace sdk_util
; // For sdk_util::ThreadPool
36 #define INLINE inline __attribute__((always_inline))
38 // BGRA helper macro, for constructing a pixel for a BGRA buffer.
39 #define MakeBGRA(b, g, r, a) \
40 (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
42 const int kFramesToBenchmark
= 100;
43 const int kCellAlignment
= 0x10;
45 // 128 bit vector types
46 typedef uint8_t u8x16_t
__attribute__ ((vector_size (16)));
48 // Helper function to broadcast x across 16 element vector.
49 INLINE u8x16_t
broadcast(uint8_t x
) {
50 u8x16_t r
= {x
, x
, x
, x
, x
, x
, x
, x
, x
, x
, x
, x
, x
, x
, x
, x
};
54 // Convert a count value into a live (green) or dead color value.
55 const uint32_t kNeighborColors
[] = {
56 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
57 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
58 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
59 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
60 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
61 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
62 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
63 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
64 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
65 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
66 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
67 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
68 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
69 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
70 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
71 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
72 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
73 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
76 // These represent the new health value of a cell based on its neighboring
77 // values. The health is binary: either alive or dead.
78 const uint8_t kIsAlive
[] = {
79 0, 0, 0, 0, 0, 1, 1, 1, 0,
80 0, 0, 0, 0, 0, 0, 0, 0, 0
83 // Timer helper for benchmarking. Returns seconds elapsed since program start,
86 int start_tv_retv
= gettimeofday(&start_tv
, NULL
);
88 inline double getseconds() {
89 const double usec_to_sec
= 0.000001;
91 if ((0 == start_tv_retv
) && (0 == gettimeofday(&tv
, NULL
)))
92 return (tv
.tv_sec
- start_tv
.tv_sec
) + tv
.tv_usec
* usec_to_sec
;
102 // Runs a tick of the simulations, update 2D output.
104 // Handle event from user, or message from JS.
105 void HandleEvent(PSEvent
* ps_event
);
107 void UpdateContext();
108 void DrawCell(int32_t x
, int32_t y
);
109 void ProcessTouchEvent(const pp::TouchInputEvent
& touches
);
110 void PostUpdateMessage(const char* message
, double value
);
111 void StartBenchmark();
114 void wSimulate(int y
);
115 static void wSimulateEntry(int y
, void* data
);
121 int benchmark_frame_counter_
;
122 double bench_start_time_
;
123 double bench_end_time_
;
126 int32_t cell_stride_
;
129 PSContext2D_t
* ps_context_
;
130 ThreadPool
* workers_
;
136 benchmarking_(false),
137 benchmark_frame_counter_(0),
138 bench_start_time_(0.0),
139 bench_end_time_(0.0),
145 ps_context_
= PSContext2DAllocate(PP_IMAGEDATAFORMAT_BGRA_PREMUL
);
146 // Query system for number of processors via sysconf()
147 int num_threads
= sysconf(_SC_NPROCESSORS_ONLN
);
150 workers_
= new ThreadPool(num_threads
);
151 PSEventSetFilter(PSE_ALL
);
156 PSContext2DFree(ps_context_
);
159 void Life::UpdateContext() {
160 cell_stride_
= (ps_context_
->width
+ kCellAlignment
- 1) &
161 ~(kCellAlignment
- 1);
162 size_t size
= cell_stride_
* ps_context_
->height
;
164 if (ps_context_
->width
!= width_
|| ps_context_
->height
!= height_
) {
168 // Create a new context
169 void* in_buffer
= NULL
;
170 void* out_buffer
= NULL
;
171 // alloc buffers aligned on 16 bytes
172 posix_memalign(&in_buffer
, kCellAlignment
, size
);
173 posix_memalign(&out_buffer
, kCellAlignment
, size
);
174 cell_in_
= (uint8_t*) in_buffer
;
175 cell_out_
= (uint8_t*) out_buffer
;
177 memset(cell_out_
, 0, size
);
178 for (size_t index
= 0; index
< size
; index
++) {
179 cell_in_
[index
] = rand() & 1;
181 width_
= ps_context_
->width
;
182 height_
= ps_context_
->height
;
186 void Life::DrawCell(int32_t x
, int32_t y
) {
187 if (!cell_in_
) return;
188 if (x
> 0 && x
< ps_context_
->width
- 1 &&
189 y
> 0 && y
< ps_context_
->height
- 1) {
190 cell_in_
[x
- 1 + y
* cell_stride_
] = 1;
191 cell_in_
[x
+ 1 + y
* cell_stride_
] = 1;
192 cell_in_
[x
+ (y
- 1) * cell_stride_
] = 1;
193 cell_in_
[x
+ (y
+ 1) * cell_stride_
] = 1;
197 void Life::ProcessTouchEvent(const pp::TouchInputEvent
& touches
) {
198 uint32_t count
= touches
.GetTouchCount(PP_TOUCHLIST_TYPE_TOUCHES
);
200 for (i
= 0; i
< count
; i
++) {
201 pp::TouchPoint touch
=
202 touches
.GetTouchByIndex(PP_TOUCHLIST_TYPE_TOUCHES
, i
);
203 int radius
= (int)(touch
.radii().x());
204 int x
= (int)(touch
.position().x());
205 int y
= (int)(touch
.position().y());
206 // num = 1/100th the area of touch point
207 uint32_t num
= (uint32_t)(M_PI
* radius
* radius
/ 100.0f
);
208 for (j
= 0; j
< num
; j
++) {
209 int dx
= rand() % (radius
* 2) - radius
;
210 int dy
= rand() % (radius
* 2) - radius
;
211 // only plot random cells within the touch area
212 if (dx
* dx
+ dy
* dy
<= radius
* radius
)
213 DrawCell(x
+ dx
, y
+ dy
);
218 void Life::PostUpdateMessage(const char* message_name
, double value
) {
219 pp::VarDictionary message
;
220 message
.Set("message", message_name
);
221 message
.Set("value", value
);
222 PSInterfaceMessaging()->PostMessage(PSGetInstanceId(), message
.pp_var());
225 void Life::StartBenchmark() {
226 printf("Running benchmark... (SIMD: %s, multi-threading: %s, size: %dx%d)\n",
227 simd_
? "enabled" : "disabled",
228 multithread_
? "enabled" : "disabled",
230 ps_context_
->height
);
231 benchmarking_
= true;
232 bench_start_time_
= getseconds();
233 benchmark_frame_counter_
= kFramesToBenchmark
;
236 void Life::EndBenchmark() {
238 bench_end_time_
= getseconds();
239 benchmarking_
= false;
240 total_time
= bench_end_time_
- bench_start_time_
;
241 printf("Finished - benchmark took %f seconds\n", total_time
);
242 // Send benchmark result to JS.
243 PostUpdateMessage("benchmark_result", total_time
);
246 void Life::HandleEvent(PSEvent
* ps_event
) {
247 // Give the 2D context a chance to process the event.
248 if (0 != PSContext2DHandleEvent(ps_context_
, ps_event
)) {
253 switch(ps_event
->type
) {
255 case PSE_INSTANCE_HANDLEINPUT
: {
256 pp::InputEvent
event(ps_event
->as_resource
);
258 switch(event
.GetType()) {
259 case PP_INPUTEVENT_TYPE_MOUSEDOWN
:
260 case PP_INPUTEVENT_TYPE_MOUSEMOVE
: {
261 pp::MouseInputEvent mouse
= pp::MouseInputEvent(event
);
262 // If the button is down, draw
263 if (mouse
.GetModifiers() & PP_INPUTEVENT_MODIFIER_LEFTBUTTONDOWN
) {
264 PP_Point location
= mouse
.GetPosition();
265 DrawCell(location
.x
, location
.y
);
270 case PP_INPUTEVENT_TYPE_TOUCHSTART
:
271 case PP_INPUTEVENT_TYPE_TOUCHMOVE
: {
272 pp::TouchInputEvent touches
= pp::TouchInputEvent(event
);
273 ProcessTouchEvent(touches
);
277 case PP_INPUTEVENT_TYPE_KEYDOWN
: {
278 pp::Fullscreen
fullscreen((pp::InstanceHandle(PSGetInstanceId())));
279 bool isFullscreen
= fullscreen
.IsFullscreen();
280 fullscreen
.SetFullscreen(!isFullscreen
);
287 break; // case PSE_INSTANCE_HANDLEINPUT
290 case PSE_INSTANCE_HANDLEMESSAGE
: {
291 // Convert Pepper Simple message to PPAPI C++ vars
292 pp::Var
var(ps_event
->as_var
);
293 if (var
.is_dictionary()) {
294 pp::VarDictionary
dictionary(var
);
295 std::string message
= dictionary
.Get("message").AsString();
296 if (message
== "run_benchmark" && !benchmarking_
) {
298 } else if (message
== "set_simd") {
299 simd_
= dictionary
.Get("value").AsBool();
300 } else if (message
== "set_threading") {
301 multithread_
= dictionary
.Get("value").AsBool();
304 break; // case PSE_INSTANCE_HANDLEMESSAGE
313 int32_t width
= ps_context_
->width
;
314 int32_t height
= ps_context_
->height
;
315 int32_t stride
= cell_stride_
;
317 if (cell_in_
== NULL
|| cell_out_
== NULL
)
320 for (i
= 0; i
< width
; ++i
) {
321 cell_in_
[i
] = rand() & 1;
322 cell_in_
[i
+ (height
- 1) * stride
] = rand() & 1;
324 for (i
= 0; i
< height
; ++i
) {
325 cell_in_
[i
* stride
] = rand() & 1;
326 cell_in_
[i
* stride
+ (width
- 1)] = rand() & 1;
330 void Life::wSimulate(int y
) {
331 // Don't run simulation on top and bottom borders
332 if (y
< 1 || y
>= ps_context_
->height
- 1)
335 // Do neighbor summation; apply rules, output pixel color. Note that a 1 cell
336 // wide perimeter is excluded from the simulation update; only cells from
337 // x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated.
338 uint8_t *src0
= (cell_in_
+ (y
- 1) * cell_stride_
);
339 uint8_t *src1
= src0
+ cell_stride_
;
340 uint8_t *src2
= src1
+ cell_stride_
;
341 uint8_t *dst
= (cell_out_
+ y
* cell_stride_
) + 1;
342 uint32_t *pixels
= static_cast<uint32_t *>(ps_context_
->data
);
343 uint32_t *pixel_line
= // static_cast<uint32_t*>
344 (pixels
+ y
* ps_context_
->stride
/ sizeof(uint32_t));
348 const u8x16_t kOne
= broadcast(1);
349 const u8x16_t kFour
= broadcast(4);
350 const u8x16_t kEight
= broadcast(8);
351 const u8x16_t kZero255
= {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
354 u8x16_t src00
= *reinterpret_cast<u8x16_t
*>(&src0
[0]);
355 u8x16_t src01
= *reinterpret_cast<u8x16_t
*>(&src0
[16]);
356 u8x16_t src10
= *reinterpret_cast<u8x16_t
*>(&src1
[0]);
357 u8x16_t src11
= *reinterpret_cast<u8x16_t
*>(&src1
[16]);
358 u8x16_t src20
= *reinterpret_cast<u8x16_t
*>(&src2
[0]);
359 u8x16_t src21
= *reinterpret_cast<u8x16_t
*>(&src2
[16]);
361 // This inner loop is SIMD - each loop iteration will process 16 cells.
362 for (; (x
+ 15) < (ps_context_
->width
- 1); x
+= 16) {
364 // Construct jittered source temps, using __builtin_shufflevector(..) to
365 // extract a shifted 16 element vector from the 32 element concatenation
366 // of two source vectors.
367 u8x16_t src0j0
= src00
;
368 u8x16_t src0j1
= __builtin_shufflevector(src00
, src01
,
369 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
370 u8x16_t src0j2
= __builtin_shufflevector(src00
, src01
,
371 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
372 u8x16_t src1j0
= src10
;
373 u8x16_t src1j1
= __builtin_shufflevector(src10
, src11
,
374 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
375 u8x16_t src1j2
= __builtin_shufflevector(src10
, src11
,
376 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
377 u8x16_t src2j0
= src20
;
378 u8x16_t src2j1
= __builtin_shufflevector(src20
, src21
,
379 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
380 u8x16_t src2j2
= __builtin_shufflevector(src20
, src21
,
381 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
383 // Sum the jittered sources to construct neighbor count.
384 u8x16_t count
= src0j0
+ src0j1
+ src0j2
+
386 src2j0
+ src2j1
+ src2j2
;
387 // Add the center cell.
388 count
= count
+ count
+ src1j1
;
389 // If count > 4 and < 8, center cell will be alive in the next frame.
390 u8x16_t alive1
= count
> kFour
;
391 u8x16_t alive2
= count
< kEight
;
392 // Intersect the two comparisons from above.
393 u8x16_t alive
= alive1
& alive2
;
395 // At this point, alive[x] will be one of two values:
396 // 0x00 for a dead cell
397 // 0xFF for an alive cell.
399 // Next, convert alive cells to green pixel color.
400 // Use __builtin_shufflevector(..) to construct output pixels from
401 // concantination of alive vector and kZero255 const vector.
402 // Indices 0..15 select the 16 cells from alive vector.
403 // Index 16 is zero constant from kZero255 constant vector.
404 // Index 17 is 255 constant from kZero255 constant vector.
405 // Output pixel color values are in BGRABGRABGRABGRA order.
406 // Since each pixel needs 4 bytes of color information, 16 cells will
407 // need to expand to 4 seperate 16 byte pixel splats.
408 u8x16_t pixel0_3
= __builtin_shufflevector(alive
, kZero255
,
409 16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17);
410 u8x16_t pixel4_7
= __builtin_shufflevector(alive
, kZero255
,
411 16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17);
412 u8x16_t pixel8_11
= __builtin_shufflevector(alive
, kZero255
,
413 16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17);
414 u8x16_t pixel12_15
= __builtin_shufflevector(alive
, kZero255
,
415 16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17);
417 // Write 16 pixels to output pixel buffer.
418 *reinterpret_cast<u8x16_t
*>(pixel_line
+ 0) = pixel0_3
;
419 *reinterpret_cast<u8x16_t
*>(pixel_line
+ 4) = pixel4_7
;
420 *reinterpret_cast<u8x16_t
*>(pixel_line
+ 8) = pixel8_11
;
421 *reinterpret_cast<u8x16_t
*>(pixel_line
+ 12) = pixel12_15
;
423 // Convert alive mask to 1 or 0 and store in destination cell array.
424 *reinterpret_cast<u8x16_t
*>(dst
) = alive
& kOne
;
426 // Increment pointers.
433 // Shift source over by 16 cells and read the next 16 cells.
435 src01
= *reinterpret_cast<u8x16_t
*>(&src0
[16]);
437 src11
= *reinterpret_cast<u8x16_t
*>(&src1
[16]);
439 src21
= *reinterpret_cast<u8x16_t
*>(&src2
[16]);
443 // The SIMD loop above does 16 cells at a time. The loop below is the
444 // regular version which processes one cell at a time. It is used to
445 // finish the remainder of the scanline not handled by the SIMD loop.
446 for (; x
< (ps_context_
->width
- 1); ++x
) {
447 // Sum the jittered sources to construct neighbor count.
448 int count
= src0
[0] + src0
[1] + src0
[2] +
449 src1
[0] + + src1
[2] +
450 src2
[0] + src2
[1] + src2
[2];
451 // Add the center cell.
452 count
= count
+ count
+ src1
[1];
453 // Use table lookup indexed by count to determine pixel & alive state.
454 uint32_t color
= kNeighborColors
[count
];
455 *pixel_line
++ = color
;
456 *dst
++ = kIsAlive
[count
];
463 // Static entry point for worker thread.
464 void Life::wSimulateEntry(int slice
, void* thiz
) {
465 static_cast<Life
*>(thiz
)->wSimulate(slice
);
468 void Life::Simulate() {
469 // Stir up the edges to prevent the simulation from reaching steady state.
473 // If multi-threading enabled, dispatch tasks to pool of worker threads.
474 workers_
->Dispatch(ps_context_
->height
, wSimulateEntry
, this);
476 // Else manually simulate each line on this thread.
477 for (int y
= 0; y
< ps_context_
->height
; y
++) {
478 wSimulateEntry(y
, this);
481 std::swap(cell_in_
, cell_out_
);
484 void Life::Update() {
486 PSContext2DGetBuffer(ps_context_
);
487 if (NULL
== ps_context_
->data
)
490 // If we somehow have not allocated these pointers yet, skip this frame.
491 if (!cell_in_
|| !cell_out_
) return;
493 // Simulate one (or more if benchmarking) frames
498 --benchmark_frame_counter_
;
499 } while(benchmark_frame_counter_
> 0);
503 PSContext2DSwapBuffer(ps_context_
);
506 // Starting point for the module. We do not use main since it would
507 // collide with main in libppapi_cpp.
508 int example_main(int argc
, char* argv
[]) {
512 // Consume all available events
513 while ((ps_event
= PSEventTryAcquire()) != NULL
) {
514 life
.HandleEvent(ps_event
);
515 PSEventRelease(ps_event
);
517 // Do simulation, render and present.
523 // Register the function to call once the Instance Object is initialized.
524 // see: pappi_simple/ps_main.h
525 PPAPI_SIMPLE_REGISTER_MAIN(example_main
);