3 * ATI Mach64 Hardware Acceleration
6 #include <linux/delay.h>
7 #include <asm/unaligned.h>
9 #include <video/mach64.h>
13 * Generic Mach64 routines
16 /* this is for DMA GUI engine! work in progress */
22 } BM_DESCRIPTOR_ENTRY
;
24 #define LAST_DESCRIPTOR (1 << 31)
25 #define SYSTEM_TO_FRAME_BUFFER 0
27 static u32
rotation24bpp(u32 dx
, u32 direction
)
30 if (direction
& DST_X_LEFT_TO_RIGHT
) {
31 rotation
= (dx
/ 4) % 6;
33 rotation
= ((dx
+ 2) / 4) % 6;
36 return ((rotation
<< 8) | DST_24_ROTATION_ENABLE
);
39 void aty_reset_engine(const struct atyfb_par
*par
)
42 aty_st_le32(GEN_TEST_CNTL
,
43 aty_ld_le32(GEN_TEST_CNTL
, par
) &
44 ~(GUI_ENGINE_ENABLE
| HWCURSOR_ENABLE
), par
);
46 aty_st_le32(GEN_TEST_CNTL
,
47 aty_ld_le32(GEN_TEST_CNTL
, par
) | GUI_ENGINE_ENABLE
, par
);
48 /* ensure engine is not locked up by clearing any FIFO or */
51 aty_ld_le32(BUS_CNTL
, par
) | BUS_HOST_ERR_ACK
| BUS_FIFO_ERR_ACK
, par
);
54 static void reset_GTC_3D_engine(const struct atyfb_par
*par
)
56 aty_st_le32(SCALE_3D_CNTL
, 0xc0, par
);
57 mdelay(GTC_3D_RESET_DELAY
);
58 aty_st_le32(SETUP_CNTL
, 0x00, par
);
59 mdelay(GTC_3D_RESET_DELAY
);
60 aty_st_le32(SCALE_3D_CNTL
, 0x00, par
);
61 mdelay(GTC_3D_RESET_DELAY
);
64 void aty_init_engine(struct atyfb_par
*par
, struct fb_info
*info
)
69 /* determine modal information from global mode structure */
70 pitch_value
= info
->fix
.line_length
/ (info
->var
.bits_per_pixel
/ 8);
71 vxres
= info
->var
.xres_virtual
;
73 if (info
->var
.bits_per_pixel
== 24) {
74 /* In 24 bpp, the engine is in 8 bpp - this requires that all */
75 /* horizontal coordinates and widths must be adjusted */
80 /* On GTC (RagePro), we need to reset the 3D engine before */
81 if (M64_HAS(RESET_3D
))
82 reset_GTC_3D_engine(par
);
84 /* Reset engine, enable, and clear any engine errors */
85 aty_reset_engine(par
);
86 /* Ensure that vga page pointers are set to zero - the upper */
87 /* page pointers are set to 1 to handle overflows in the */
89 aty_st_le32(MEM_VGA_WP_SEL
, 0x00010000, par
);
90 aty_st_le32(MEM_VGA_RP_SEL
, 0x00010000, par
);
92 /* ---- Setup standard engine context ---- */
94 /* All GUI registers here are FIFOed - therefore, wait for */
95 /* the appropriate number of empty FIFO entries */
96 wait_for_fifo(14, par
);
98 /* enable all registers to be loaded for context loads */
99 aty_st_le32(CONTEXT_MASK
, 0xFFFFFFFF, par
);
101 /* set destination pitch to modal pitch, set offset to zero */
102 aty_st_le32(DST_OFF_PITCH
, (pitch_value
/ 8) << 22, par
);
104 /* zero these registers (set them to a known state) */
105 aty_st_le32(DST_Y_X
, 0, par
);
106 aty_st_le32(DST_HEIGHT
, 0, par
);
107 aty_st_le32(DST_BRES_ERR
, 0, par
);
108 aty_st_le32(DST_BRES_INC
, 0, par
);
109 aty_st_le32(DST_BRES_DEC
, 0, par
);
111 /* set destination drawing attributes */
112 aty_st_le32(DST_CNTL
, DST_LAST_PEL
| DST_Y_TOP_TO_BOTTOM
|
113 DST_X_LEFT_TO_RIGHT
, par
);
115 /* set source pitch to modal pitch, set offset to zero */
116 aty_st_le32(SRC_OFF_PITCH
, (pitch_value
/ 8) << 22, par
);
118 /* set these registers to a known state */
119 aty_st_le32(SRC_Y_X
, 0, par
);
120 aty_st_le32(SRC_HEIGHT1_WIDTH1
, 1, par
);
121 aty_st_le32(SRC_Y_X_START
, 0, par
);
122 aty_st_le32(SRC_HEIGHT2_WIDTH2
, 1, par
);
124 /* set source pixel retrieving attributes */
125 aty_st_le32(SRC_CNTL
, SRC_LINE_X_LEFT_TO_RIGHT
, par
);
127 /* set host attributes */
128 wait_for_fifo(13, par
);
129 aty_st_le32(HOST_CNTL
, 0, par
);
131 /* set pattern attributes */
132 aty_st_le32(PAT_REG0
, 0, par
);
133 aty_st_le32(PAT_REG1
, 0, par
);
134 aty_st_le32(PAT_CNTL
, 0, par
);
136 /* set scissors to modal size */
137 aty_st_le32(SC_LEFT
, 0, par
);
138 aty_st_le32(SC_TOP
, 0, par
);
139 aty_st_le32(SC_BOTTOM
, par
->crtc
.vyres
- 1, par
);
140 aty_st_le32(SC_RIGHT
, vxres
- 1, par
);
142 /* set background color to minimum value (usually BLACK) */
143 aty_st_le32(DP_BKGD_CLR
, 0, par
);
145 /* set foreground color to maximum value (usually WHITE) */
146 aty_st_le32(DP_FRGD_CLR
, 0xFFFFFFFF, par
);
148 /* set write mask to effect all pixel bits */
149 aty_st_le32(DP_WRITE_MASK
, 0xFFFFFFFF, par
);
151 /* set foreground mix to overpaint and background mix to */
153 aty_st_le32(DP_MIX
, FRGD_MIX_S
| BKGD_MIX_D
, par
);
155 /* set primary source pixel channel to foreground color */
157 aty_st_le32(DP_SRC
, FRGD_SRC_FRGD_CLR
, par
);
159 /* set compare functionality to false (no-effect on */
161 wait_for_fifo(3, par
);
162 aty_st_le32(CLR_CMP_CLR
, 0, par
);
163 aty_st_le32(CLR_CMP_MASK
, 0xFFFFFFFF, par
);
164 aty_st_le32(CLR_CMP_CNTL
, 0, par
);
166 /* set pixel depth */
167 wait_for_fifo(2, par
);
168 aty_st_le32(DP_PIX_WIDTH
, par
->crtc
.dp_pix_width
, par
);
169 aty_st_le32(DP_CHAIN_MASK
, par
->crtc
.dp_chain_mask
, par
);
171 wait_for_fifo(5, par
);
172 aty_st_le32(SCALE_3D_CNTL
, 0, par
);
173 aty_st_le32(Z_CNTL
, 0, par
);
174 aty_st_le32(CRTC_INT_CNTL
, aty_ld_le32(CRTC_INT_CNTL
, par
) & ~0x20,
176 aty_st_le32(GUI_TRAJ_CNTL
, 0x100023, par
);
178 /* insure engine is idle before leaving */
183 * Accelerated functions
186 static inline void draw_rect(s16 x
, s16 y
, u16 width
, u16 height
,
187 struct atyfb_par
*par
)
189 /* perform rectangle fill */
190 wait_for_fifo(2, par
);
191 aty_st_le32(DST_Y_X
, (x
<< 16) | y
, par
);
192 aty_st_le32(DST_HEIGHT_WIDTH
, (width
<< 16) | height
, par
);
193 par
->blitter_may_be_busy
= 1;
196 void atyfb_copyarea(struct fb_info
*info
, const struct fb_copyarea
*area
)
198 struct atyfb_par
*par
= (struct atyfb_par
*) info
->par
;
199 u32 dy
= area
->dy
, sy
= area
->sy
, direction
= DST_LAST_PEL
;
200 u32 sx
= area
->sx
, dx
= area
->dx
, width
= area
->width
, rotation
= 0;
204 if (!area
->width
|| !area
->height
)
206 if (!par
->accel_flags
) {
207 cfb_copyarea(info
, area
);
211 if (info
->var
.bits_per_pixel
== 24) {
212 /* In 24 bpp, the engine is in 8 bpp - this requires that all */
213 /* horizontal coordinates and widths must be adjusted */
219 if (area
->sy
< area
->dy
) {
220 dy
+= area
->height
- 1;
221 sy
+= area
->height
- 1;
223 direction
|= DST_Y_TOP_TO_BOTTOM
;
229 direction
|= DST_X_LEFT_TO_RIGHT
;
231 if (info
->var
.bits_per_pixel
== 24) {
232 rotation
= rotation24bpp(dx
, direction
);
235 wait_for_fifo(4, par
);
236 aty_st_le32(DP_SRC
, FRGD_SRC_BLIT
, par
);
237 aty_st_le32(SRC_Y_X
, (sx
<< 16) | sy
, par
);
238 aty_st_le32(SRC_HEIGHT1_WIDTH1
, (width
<< 16) | area
->height
, par
);
239 aty_st_le32(DST_CNTL
, direction
| rotation
, par
);
240 draw_rect(dx
, dy
, width
, area
->height
, par
);
243 void atyfb_fillrect(struct fb_info
*info
, const struct fb_fillrect
*rect
)
245 struct atyfb_par
*par
= (struct atyfb_par
*) info
->par
;
246 u32 color
, dx
= rect
->dx
, width
= rect
->width
, rotation
= 0;
250 if (!rect
->width
|| !rect
->height
)
252 if (!par
->accel_flags
) {
253 cfb_fillrect(info
, rect
);
257 if (info
->fix
.visual
== FB_VISUAL_TRUECOLOR
||
258 info
->fix
.visual
== FB_VISUAL_DIRECTCOLOR
)
259 color
= ((u32
*)(info
->pseudo_palette
))[rect
->color
];
263 if (info
->var
.bits_per_pixel
== 24) {
264 /* In 24 bpp, the engine is in 8 bpp - this requires that all */
265 /* horizontal coordinates and widths must be adjusted */
268 rotation
= rotation24bpp(dx
, DST_X_LEFT_TO_RIGHT
);
271 wait_for_fifo(3, par
);
272 aty_st_le32(DP_FRGD_CLR
, color
, par
);
274 BKGD_SRC_BKGD_CLR
| FRGD_SRC_FRGD_CLR
| MONO_SRC_ONE
,
276 aty_st_le32(DST_CNTL
,
277 DST_LAST_PEL
| DST_Y_TOP_TO_BOTTOM
|
278 DST_X_LEFT_TO_RIGHT
| rotation
, par
);
279 draw_rect(dx
, rect
->dy
, width
, rect
->height
, par
);
282 void atyfb_imageblit(struct fb_info
*info
, const struct fb_image
*image
)
284 struct atyfb_par
*par
= (struct atyfb_par
*) info
->par
;
285 u32 src_bytes
, dx
= image
->dx
, dy
= image
->dy
, width
= image
->width
;
286 u32 pix_width_save
, pix_width
, host_cntl
, rotation
= 0, src
, mix
;
290 if (!image
->width
|| !image
->height
)
292 if (!par
->accel_flags
||
293 (image
->depth
!= 1 && info
->var
.bits_per_pixel
!= image
->depth
)) {
294 cfb_imageblit(info
, image
);
298 pix_width
= pix_width_save
= aty_ld_le32(DP_PIX_WIDTH
, par
);
299 host_cntl
= aty_ld_le32(HOST_CNTL
, par
) | HOST_BYTE_ALIGN
;
301 switch (image
->depth
) {
303 pix_width
&= ~(BYTE_ORDER_MASK
| HOST_MASK
);
304 pix_width
|= (BYTE_ORDER_MSB_TO_LSB
| HOST_1BPP
);
307 pix_width
&= ~(BYTE_ORDER_MASK
| HOST_MASK
);
308 pix_width
|= (BYTE_ORDER_MSB_TO_LSB
| HOST_4BPP
);
311 pix_width
&= ~HOST_MASK
;
312 pix_width
|= HOST_8BPP
;
315 pix_width
&= ~HOST_MASK
;
316 pix_width
|= HOST_15BPP
;
319 pix_width
&= ~HOST_MASK
;
320 pix_width
|= HOST_16BPP
;
323 pix_width
&= ~HOST_MASK
;
324 pix_width
|= HOST_24BPP
;
327 pix_width
&= ~HOST_MASK
;
328 pix_width
|= HOST_32BPP
;
332 if (info
->var
.bits_per_pixel
== 24) {
333 /* In 24 bpp, the engine is in 8 bpp - this requires that all */
334 /* horizontal coordinates and widths must be adjusted */
338 rotation
= rotation24bpp(dx
, DST_X_LEFT_TO_RIGHT
);
340 pix_width
&= ~DST_MASK
;
341 pix_width
|= DST_8BPP
;
344 * since Rage 3D IIc we have DP_HOST_TRIPLE_EN bit
345 * this hwaccelerated triple has an issue with not aligned data
347 if (M64_HAS(HW_TRIPLE
) && image
->width
% 8 == 0)
348 pix_width
|= DP_HOST_TRIPLE_EN
;
351 if (image
->depth
== 1) {
353 if (info
->fix
.visual
== FB_VISUAL_TRUECOLOR
||
354 info
->fix
.visual
== FB_VISUAL_DIRECTCOLOR
) {
355 fg
= ((u32
*)(info
->pseudo_palette
))[image
->fg_color
];
356 bg
= ((u32
*)(info
->pseudo_palette
))[image
->bg_color
];
358 fg
= image
->fg_color
;
359 bg
= image
->bg_color
;
362 wait_for_fifo(2, par
);
363 aty_st_le32(DP_BKGD_CLR
, bg
, par
);
364 aty_st_le32(DP_FRGD_CLR
, fg
, par
);
365 src
= MONO_SRC_HOST
| FRGD_SRC_FRGD_CLR
| BKGD_SRC_BKGD_CLR
;
366 mix
= FRGD_MIX_S
| BKGD_MIX_S
;
368 src
= MONO_SRC_ONE
| FRGD_SRC_HOST
;
369 mix
= FRGD_MIX_D_XOR_S
| BKGD_MIX_D
;
372 wait_for_fifo(6, par
);
373 aty_st_le32(DP_WRITE_MASK
, 0xFFFFFFFF, par
);
374 aty_st_le32(DP_PIX_WIDTH
, pix_width
, par
);
375 aty_st_le32(DP_MIX
, mix
, par
);
376 aty_st_le32(DP_SRC
, src
, par
);
377 aty_st_le32(HOST_CNTL
, host_cntl
, par
);
378 aty_st_le32(DST_CNTL
, DST_Y_TOP_TO_BOTTOM
| DST_X_LEFT_TO_RIGHT
| rotation
, par
);
380 draw_rect(dx
, dy
, width
, image
->height
, par
);
381 src_bytes
= (((image
->width
* image
->depth
) + 7) / 8) * image
->height
;
383 /* manual triple each pixel */
384 if (info
->var
.bits_per_pixel
== 24 && !(pix_width
& DP_HOST_TRIPLE_EN
)) {
385 int inbit
, outbit
, mult24
, byte_id_in_dword
, width
;
386 u8
*pbitmapin
= (u8
*)image
->data
, *pbitmapout
;
389 for (width
= image
->width
, inbit
= 7, mult24
= 0; src_bytes
; ) {
390 for (hostdword
= 0, pbitmapout
= (u8
*)&hostdword
, byte_id_in_dword
= 0;
391 byte_id_in_dword
< 4 && src_bytes
;
392 byte_id_in_dword
++, pbitmapout
++) {
393 for (outbit
= 7; outbit
>= 0; outbit
--) {
394 *pbitmapout
|= (((*pbitmapin
>> inbit
) & 1) << outbit
);
404 if (inbit
< 0 || width
== 0) {
410 width
= image
->width
;
416 wait_for_fifo(1, par
);
417 aty_st_le32(HOST_DATA0
, hostdword
, par
);
420 u32
*pbitmap
, dwords
= (src_bytes
+ 3) / 4;
421 for (pbitmap
= (u32
*)(image
->data
); dwords
; dwords
--, pbitmap
++) {
422 wait_for_fifo(1, par
);
423 aty_st_le32(HOST_DATA0
, get_unaligned_le32(pbitmap
), par
);
427 /* restore pix_width */
428 wait_for_fifo(1, par
);
429 aty_st_le32(DP_PIX_WIDTH
, pix_width_save
, par
);