2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
8 /** @file viewport_sprite_sorter_sse4.cpp Sprite sorter that uses SSE4.1. */
14 #include "smmintrin.h"
15 #include "viewport_sprite_sorter.h"
16 #include <forward_list>
20 #include "safeguards.h"
22 #ifdef POINTER_IS_64BIT
23 static_assert((sizeof(ParentSpriteToDraw
) % 16) == 0);
24 # define LOAD_128 _mm_load_si128
26 # define LOAD_128 _mm_loadu_si128
29 void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector
*psdv
)
31 if (psdv
->size() < 2) return;
33 const __m128i mask_ptest
= _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0);
35 /* We rely on sprites being, for the most part, already ordered.
36 * So we don't need to move many of them and can keep track of their
37 * order efficiently by using stack. We always move sprites to the front
38 * of the current position, i.e. to the top of the stack.
39 * Also use special constants to indicate sorting state without
40 * adding extra fields to ParentSpriteToDraw structure.
42 const uint32 ORDER_COMPARED
= UINT32_MAX
; // Sprite was compared but we still need to compare the ones preceding it
43 const uint32 ORDER_RETURNED
= UINT32_MAX
- 1; // Mark sorted sprite in case there are other occurrences of it in the stack
44 std::stack
<ParentSpriteToDraw
*> sprite_order
;
45 uint32 next_order
= 0;
47 std::forward_list
<std::pair
<int64
, ParentSpriteToDraw
*>> sprite_list
; // We store sprites in a list sorted by xmin+ymin
49 /* Initialize sprite list and order. */
50 for (auto p
= psdv
->rbegin(); p
!= psdv
->rend(); p
++) {
51 sprite_list
.push_front(std::make_pair((*p
)->xmin
+ (*p
)->ymin
, *p
));
52 sprite_order
.push(*p
);
53 (*p
)->order
= next_order
++;
58 std::vector
<ParentSpriteToDraw
*> preceding
; // Temporarily stores sprites that precede current and their position in the list
59 auto preceding_prev
= sprite_list
.begin(); // Store iterator in case we need to delete a single preciding sprite
60 auto out
= psdv
->begin(); // Iterator to output sorted sprites
62 while (!sprite_order
.empty()) {
64 auto s
= sprite_order
.top();
67 /* Sprite is already sorted, ignore it. */
68 if (s
->order
== ORDER_RETURNED
) continue;
70 /* Sprite was already compared, just need to output it. */
71 if (s
->order
== ORDER_COMPARED
) {
73 s
->order
= ORDER_RETURNED
;
79 /* We only need sprites with xmin <= s->xmax && ymin <= s->ymax && zmin <= s->zmax
80 * So by iterating sprites with xmin + ymin <= s->xmax + s->ymax
81 * we get all we need and some more that we filter out later.
82 * We don't include zmin into the sum as there are usually more neighbors on x and y than z
83 * so including it will actually increase the amount of false positives.
84 * Also min coordinates can be > max so using max(xmin, xmax) + max(ymin, ymax)
85 * to ensure that we iterate the current sprite as we need to remove it from the list.
87 auto ssum
= std::max(s
->xmax
, s
->xmin
) + std::max(s
->ymax
, s
->ymin
);
88 auto prev
= sprite_list
.before_begin();
89 auto x
= sprite_list
.begin();
90 while (x
!= sprite_list
.end() && ((*x
).first
<= ssum
)) {
93 /* We found the current sprite, remove it and move on. */
94 x
= sprite_list
.erase_after(prev
);
101 /* Check that p->xmin <= s->xmax && p->ymin <= s->ymax && p->zmin <= s->zmax */
102 __m128i s_max
= LOAD_128((__m128i
*) &s
->xmax
);
103 __m128i p_min
= LOAD_128((__m128i
*) &p
->xmin
);
104 __m128i r1
= _mm_cmplt_epi32(s_max
, p_min
);
105 if (!_mm_testz_si128(mask_ptest
, r1
))
108 /* Check if sprites overlap, i.e.
109 * s->xmin <= p->xmax && s->ymin <= p->ymax && s->zmin <= p->zmax
111 __m128i s_min
= LOAD_128((__m128i
*) &s
->xmin
);
112 __m128i p_max
= LOAD_128((__m128i
*) &p
->xmax
);
113 __m128i r2
= _mm_cmplt_epi32(p_max
, s_min
);
114 if (_mm_testz_si128(mask_ptest
, r2
)) {
115 /* Use X+Y+Z as the sorting order, so sprites closer to the bottom of
116 * the screen and with higher Z elevation, are drawn in front.
117 * Here X,Y,Z are the coordinates of the "center of mass" of the sprite,
118 * i.e. X=(left+right)/2, etc.
119 * However, since we only care about order, don't actually divide / 2
121 if (s
->xmin
+ s
->xmax
+ s
->ymin
+ s
->ymax
+ s
->zmin
+ s
->zmax
<=
122 p
->xmin
+ p
->xmax
+ p
->ymin
+ p
->ymax
+ p
->zmin
+ p
->zmax
) {
127 preceding
.push_back(p
);
128 preceding_prev
= p_prev
;
131 if (preceding
.empty()) {
132 /* No preceding sprites, add current one to the output */
134 s
->order
= ORDER_RETURNED
;
138 /* Optimization for the case when we only have 1 sprite to move. */
139 if (preceding
.size() == 1) {
140 auto p
= preceding
[0];
141 /* We can only output the preceding sprite if there can't be any other sprites preceding it. */
142 if (p
->xmax
<= s
->xmax
&& p
->ymax
<= s
->ymax
&& p
->zmax
<= s
->zmax
) {
143 p
->order
= ORDER_RETURNED
;
144 s
->order
= ORDER_RETURNED
;
145 sprite_list
.erase_after(preceding_prev
);
152 /* Sort all preceding sprites by order and assign new orders in reverse (as original sorter did). */
153 std::sort(preceding
.begin(), preceding
.end(), [](const ParentSpriteToDraw
*a
, const ParentSpriteToDraw
*b
) {
154 return a
->order
> b
->order
;
157 s
->order
= ORDER_COMPARED
;
158 sprite_order
.push(s
); // Still need to output so push it back for now
160 for (auto p
: preceding
) {
161 p
->order
= next_order
++;
162 sprite_order
.push(p
);
169 * Check whether the current CPU supports SSE 4.1.
170 * @return True iff the CPU supports SSE 4.1.
172 bool ViewportSortParentSpritesSSE41Checker()
174 return HasCPUIDFlag(1, 2, 19);
177 #endif /* WITH_SSE */