1 /**************************************************************************
3 * Copyright 2010 VMware, Inc. All Rights Reserved.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 **************************************************************************/
29 * Code to convert images from tiled to linear and back.
30 * XXX there are quite a few assumptions about color and z/stencil being
35 #include "util/u_format.h"
36 #include "lp_tile_soa.h"
37 #include "lp_tile_image.h"
40 #define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4)
44 * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout
45 * at dst, with dst_stride words between rows.
48 untile_4_4_uint32(const uint32_t *src
, uint32_t *dst
, unsigned dst_stride
)
51 uint32_t *d1
= d0
+ dst_stride
;
52 uint32_t *d2
= d1
+ dst_stride
;
53 uint32_t *d3
= d2
+ dst_stride
;
55 d0
[0] = src
[0]; d0
[1] = src
[1]; d0
[2] = src
[4]; d0
[3] = src
[5];
56 d1
[0] = src
[2]; d1
[1] = src
[3]; d1
[2] = src
[6]; d1
[3] = src
[7];
57 d2
[0] = src
[8]; d2
[1] = src
[9]; d2
[2] = src
[12]; d2
[3] = src
[13];
58 d3
[0] = src
[10]; d3
[1] = src
[11]; d3
[2] = src
[14]; d3
[3] = src
[15];
64 * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout
65 * at dst, with dst_stride words between rows.
68 untile_4_4_uint16(const uint16_t *src
, uint16_t *dst
, unsigned dst_stride
)
71 uint16_t *d1
= d0
+ dst_stride
;
72 uint16_t *d2
= d1
+ dst_stride
;
73 uint16_t *d3
= d2
+ dst_stride
;
75 d0
[0] = src
[0]; d0
[1] = src
[1]; d0
[2] = src
[4]; d0
[3] = src
[5];
76 d1
[0] = src
[2]; d1
[1] = src
[3]; d1
[2] = src
[6]; d1
[3] = src
[7];
77 d2
[0] = src
[8]; d2
[1] = src
[9]; d2
[2] = src
[12]; d2
[3] = src
[13];
78 d3
[0] = src
[10]; d3
[1] = src
[11]; d3
[2] = src
[14]; d3
[3] = src
[15];
84 * Convert a 4x4 rect of 32-bit words from a linear layout into tiled
85 * layout (in which all 16 words are contiguous).
88 tile_4_4_uint32(const uint32_t *src
, uint32_t *dst
, unsigned src_stride
)
90 const uint32_t *s0
= src
;
91 const uint32_t *s1
= s0
+ src_stride
;
92 const uint32_t *s2
= s1
+ src_stride
;
93 const uint32_t *s3
= s2
+ src_stride
;
95 dst
[0] = s0
[0]; dst
[1] = s0
[1]; dst
[4] = s0
[2]; dst
[5] = s0
[3];
96 dst
[2] = s1
[0]; dst
[3] = s1
[1]; dst
[6] = s1
[2]; dst
[7] = s1
[3];
97 dst
[8] = s2
[0]; dst
[9] = s2
[1]; dst
[12] = s2
[2]; dst
[13] = s2
[3];
98 dst
[10] = s3
[0]; dst
[11] = s3
[1]; dst
[14] = s3
[2]; dst
[15] = s3
[3];
104 * Convert a 4x4 rect of 16-bit words from a linear layout into tiled
105 * layout (in which all 16 words are contiguous).
108 tile_4_4_uint16(const uint16_t *src
, uint16_t *dst
, unsigned src_stride
)
110 const uint16_t *s0
= src
;
111 const uint16_t *s1
= s0
+ src_stride
;
112 const uint16_t *s2
= s1
+ src_stride
;
113 const uint16_t *s3
= s2
+ src_stride
;
115 dst
[0] = s0
[0]; dst
[1] = s0
[1]; dst
[4] = s0
[2]; dst
[5] = s0
[3];
116 dst
[2] = s1
[0]; dst
[3] = s1
[1]; dst
[6] = s1
[2]; dst
[7] = s1
[3];
117 dst
[8] = s2
[0]; dst
[9] = s2
[1]; dst
[12] = s2
[2]; dst
[13] = s2
[3];
118 dst
[10] = s3
[0]; dst
[11] = s3
[1]; dst
[14] = s3
[2]; dst
[15] = s3
[3];
124 * Convert a tiled image into a linear image.
125 * \param dst_stride dest row stride in bytes
128 lp_tiled_to_linear(const void *src
, void *dst
,
129 unsigned x
, unsigned y
,
130 unsigned width
, unsigned height
,
131 enum pipe_format format
,
133 unsigned tiles_per_row
)
135 assert(x
% TILE_SIZE
== 0);
136 assert(y
% TILE_SIZE
== 0);
137 /*assert(width % TILE_SIZE == 0);
138 assert(height % TILE_SIZE == 0);*/
140 /* Note that Z/stencil surfaces use a different tiling size than
143 if (util_format_is_depth_or_stencil(format
)) {
144 const uint bpp
= util_format_get_blocksize(format
);
145 const uint src_stride
= dst_stride
* TILE_VECTOR_WIDTH
;
146 const uint tile_w
= TILE_VECTOR_WIDTH
, tile_h
= TILE_VECTOR_HEIGHT
;
147 const uint tiles_per_row
= src_stride
/ (tile_w
* tile_h
* bpp
);
149 dst_stride
/= bpp
; /* convert from bytes to words */
152 const uint32_t *src32
= (const uint32_t *) src
;
153 uint32_t *dst32
= (uint32_t *) dst
;
156 for (j
= 0; j
< height
; j
+= tile_h
) {
157 for (i
= 0; i
< width
; i
+= tile_w
) {
158 /* compute offsets in 32-bit words */
159 uint ii
= i
+ x
, jj
= j
+ y
;
160 uint src_offset
= (jj
/ tile_h
* tiles_per_row
+ ii
/ tile_w
)
162 uint dst_offset
= jj
* dst_stride
+ ii
;
163 untile_4_4_uint32(src32
+ src_offset
,
170 const uint16_t *src16
= (const uint16_t *) src
;
171 uint16_t *dst16
= (uint16_t *) dst
;
176 for (j
= 0; j
< height
; j
+= tile_h
) {
177 for (i
= 0; i
< width
; i
+= tile_w
) {
178 /* compute offsets in 16-bit words */
179 uint ii
= i
+ x
, jj
= j
+ y
;
180 uint src_offset
= (jj
/ tile_h
* tiles_per_row
+ ii
/ tile_w
)
182 uint dst_offset
= jj
* dst_stride
+ ii
;
183 untile_4_4_uint16(src16
+ src_offset
,
193 const uint tile_w
= TILE_SIZE
, tile_h
= TILE_SIZE
;
194 const uint bytes_per_tile
= tile_w
* tile_h
* bpp
;
197 for (j
= 0; j
< height
; j
+= tile_h
) {
198 for (i
= 0; i
< width
; i
+= tile_w
) {
199 uint ii
= i
+ x
, jj
= j
+ y
;
200 uint tile_offset
= ((jj
/ tile_h
) * tiles_per_row
+ ii
/ tile_w
);
201 uint byte_offset
= tile_offset
* bytes_per_tile
;
202 const uint8_t *src_tile
= (uint8_t *) src
+ byte_offset
;
204 lp_tile_unswizzle_4ub(format
,
207 ii
, jj
, tile_w
, tile_h
);
215 * Convert a linear image into a tiled image.
216 * \param src_stride source row stride in bytes
219 lp_linear_to_tiled(const void *src
, void *dst
,
220 unsigned x
, unsigned y
,
221 unsigned width
, unsigned height
,
222 enum pipe_format format
,
224 unsigned tiles_per_row
)
226 assert(x
% TILE_SIZE
== 0);
227 assert(y
% TILE_SIZE
== 0);
229 assert(width % TILE_SIZE == 0);
230 assert(height % TILE_SIZE == 0);
233 if (util_format_is_depth_or_stencil(format
)) {
234 const uint bpp
= util_format_get_blocksize(format
);
235 const uint dst_stride
= src_stride
* TILE_VECTOR_WIDTH
;
236 const uint tile_w
= TILE_VECTOR_WIDTH
, tile_h
= TILE_VECTOR_HEIGHT
;
237 const uint tiles_per_row
= dst_stride
/ (tile_w
* tile_h
* bpp
);
239 src_stride
/= bpp
; /* convert from bytes to words */
242 const uint32_t *src32
= (const uint32_t *) src
;
243 uint32_t *dst32
= (uint32_t *) dst
;
246 for (j
= 0; j
< height
; j
+= tile_h
) {
247 for (i
= 0; i
< width
; i
+= tile_w
) {
248 /* compute offsets in 32-bit words */
249 uint ii
= i
+ x
, jj
= j
+ y
;
250 uint src_offset
= jj
* src_stride
+ ii
;
251 uint dst_offset
= (jj
/ tile_h
* tiles_per_row
+ ii
/ tile_w
)
253 tile_4_4_uint32(src32
+ src_offset
,
260 const uint16_t *src16
= (const uint16_t *) src
;
261 uint16_t *dst16
= (uint16_t *) dst
;
266 for (j
= 0; j
< height
; j
+= tile_h
) {
267 for (i
= 0; i
< width
; i
+= tile_w
) {
268 /* compute offsets in 16-bit words */
269 uint ii
= i
+ x
, jj
= j
+ y
;
270 uint src_offset
= jj
* src_stride
+ ii
;
271 uint dst_offset
= (jj
/ tile_h
* tiles_per_row
+ ii
/ tile_w
)
273 tile_4_4_uint16(src16
+ src_offset
,
282 const uint tile_w
= TILE_SIZE
, tile_h
= TILE_SIZE
;
283 const uint bytes_per_tile
= tile_w
* tile_h
* bpp
;
286 for (j
= 0; j
< height
; j
+= TILE_SIZE
) {
287 for (i
= 0; i
< width
; i
+= TILE_SIZE
) {
288 uint ii
= i
+ x
, jj
= j
+ y
;
289 uint tile_offset
= ((jj
/ tile_h
) * tiles_per_row
+ ii
/ tile_w
);
290 uint byte_offset
= tile_offset
* bytes_per_tile
;
291 uint8_t *dst_tile
= (uint8_t *) dst
+ byte_offset
;
293 lp_tile_swizzle_4ub(format
,
296 ii
, jj
, tile_w
, tile_h
);
307 test_tiled_linear_conversion(void *data
,
308 enum pipe_format format
,
309 unsigned width
, unsigned height
,
313 unsigned wt
= (width
+ TILE_SIZE
- 1) / TILE_SIZE
;
314 unsigned ht
= (height
+ TILE_SIZE
- 1) / TILE_SIZE
;
316 uint8_t *tiled
= malloc(wt
* ht
* TILE_SIZE
* TILE_SIZE
* 4);
318 /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/
320 lp_linear_to_tiled(data
, tiled
, 0, 0, width
, height
, format
,
323 lp_tiled_to_linear(tiled
, data
, 0, 0, width
, height
, format
,