vfs: check userland buffers before reading them.
[haiku.git] / src / add-ons / accelerants / nvidia / engine / nv_bes.c
blob8703bf2db3ad4737baa5ae0ac59ed4e48b39012c
1 /* Nvidia TNT and GeForce Back End Scaler functions */
2 /* Written by Rudolf Cornelissen 05/2002-5/2009 */
4 #define MODULE_BIT 0x00000200
6 #include "nv_std.h"
8 typedef struct move_overlay_info move_overlay_info;
10 struct move_overlay_info
12 uint32 hcoordv; /* left and right edges of video output window */
13 uint32 vcoordv; /* top and bottom edges of video output window */
14 uint32 hsrcstv; /* horizontal source start in source buffer (clipping) */
15 uint32 v1srcstv; /* vertical source start in source buffer (clipping) */
16 uintptr_t a1orgv; /* alternate source clipping via startadress of source buffer */
19 static void nv_bes_calc_move_overlay(move_overlay_info *moi);
20 static void nv_bes_program_move_overlay(move_overlay_info moi);
22 /* move the overlay output window in virtualscreens */
23 /* Note:
24 * si->dm.h_display_start and si->dm.v_display_start determine where the new
25 * output window is located! */
26 void nv_bes_move_overlay()
28 move_overlay_info moi;
30 /* abort if overlay is not active */
31 if (!si->overlay.active) return;
33 nv_bes_calc_move_overlay(&moi);
34 nv_bes_program_move_overlay(moi);
37 static void nv_bes_calc_move_overlay(move_overlay_info *moi)
39 /* misc used variables */
40 uint16 temp1, temp2;
41 /* visible screen window in virtual workspaces */
42 uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
44 /* do 'overlay follow head' in dualhead modes on dualhead cards */
45 if (si->ps.secondary_head)
47 switch (si->dm.flags & DUALHEAD_BITS)
49 case DUALHEAD_ON:
50 case DUALHEAD_SWITCH:
51 if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) <
52 (si->dm.h_display_start + si->dm.timing.h_display))
53 nv_bes_to_crtc(si->crtc_switch_mode);
54 else
55 nv_bes_to_crtc(!si->crtc_switch_mode);
56 break;
57 default:
58 nv_bes_to_crtc(si->crtc_switch_mode);
59 break;
63 /* the BES does not respect virtual_workspaces, but adheres to CRTC
64 * constraints only */
65 crtc_hstart = si->dm.h_display_start;
66 /* make dualhead stretch and switch mode work while we're at it.. */
67 if (si->overlay.crtc)
69 crtc_hstart += si->dm.timing.h_display;
72 /* horizontal end is the first position beyond the displayed range on the CRTC */
73 crtc_hend = crtc_hstart + si->dm.timing.h_display;
74 crtc_vstart = si->dm.v_display_start;
75 /* vertical end is the first position beyond the displayed range on the CRTC */
76 crtc_vend = crtc_vstart + si->dm.timing.v_display;
79 /****************************************
80 *** setup all edges of output window ***
81 ****************************************/
83 /* setup left and right edges of output window */
84 moi->hcoordv = 0;
85 /* left edge coordinate of output window, must be inside desktop */
86 /* clipping on the left side */
87 if (si->overlay.ow.h_start < crtc_hstart)
89 temp1 = 0;
91 else
93 /* clipping on the right side */
94 if (si->overlay.ow.h_start >= (crtc_hend - 1))
96 /* width < 2 is not allowed */
97 temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
99 else
100 /* no clipping here */
102 temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
105 moi->hcoordv |= temp1 << 16;
106 /* right edge coordinate of output window, must be inside desktop */
107 /* width < 2 is not allowed */
108 if (si->overlay.ow.width < 2)
110 temp2 = (temp1 + 1) & 0x7ff;
112 else
114 /* clipping on the right side */
115 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
117 temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
119 else
121 /* clipping on the left side */
122 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
124 /* width < 2 is not allowed */
125 temp2 = 1;
127 else
128 /* no clipping here */
130 temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
134 moi->hcoordv |= temp2 << 0;
135 LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
137 /* setup top and bottom edges of output window */
138 moi->vcoordv = 0;
139 /* top edge coordinate of output window, must be inside desktop */
140 /* clipping on the top side */
141 if (si->overlay.ow.v_start < crtc_vstart)
143 temp1 = 0;
145 else
147 /* clipping on the bottom side */
148 if (si->overlay.ow.v_start >= (crtc_vend - 1))
150 /* height < 2 is not allowed */
151 temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
153 else
154 /* no clipping here */
156 temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
159 moi->vcoordv |= temp1 << 16;
160 /* bottom edge coordinate of output window, must be inside desktop */
161 /* height < 2 is not allowed */
162 if (si->overlay.ow.height < 2)
164 temp2 = (temp1 + 1) & 0x7ff;
166 else
168 /* clipping on the bottom side */
169 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
171 temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
173 else
175 /* clipping on the top side */
176 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
178 /* height < 2 is not allowed */
179 temp2 = 1;
181 else
182 /* no clipping here */
184 temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
188 moi->vcoordv |= temp2 << 0;
189 LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
192 /*********************************
193 *** setup horizontal clipping ***
194 *********************************/
196 /* Setup horizontal source start: first (sub)pixel contributing to output picture */
197 /* Note:
198 * The method is to calculate, based on 1:1 scaling, based on the output window.
199 * After this is done, include the scaling factor so you get a value based on the input bitmap.
200 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
201 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
202 /* Note also:
203 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
204 moi->hsrcstv = 0;
205 /* check for destination horizontal clipping at left side */
206 if (si->overlay.ow.h_start < crtc_hstart)
208 /* check if entire destination picture is clipping left:
209 * (2 pixels will be clamped onscreen at least) */
210 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
212 /* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
213 moi->hsrcstv += (si->overlay.ow.width - 2);
215 else
217 /* increase 'first contributing pixel' with actual number of dest. clipping pixels */
218 moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
220 LOG(4,("Overlay: clipping left...\n"));
222 /* The calculated value is based on scaling = 1x. So we now compensate for scaling.
223 * Note that this also already takes care of aligning the value to the BES register! */
224 moi->hsrcstv *= si->overlay.h_ifactor;
226 /* take zoom into account */
227 moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
228 /* AND below required by hardware (> 1024 support confirmed on all cards) */
229 moi->hsrcstv &= 0x07fffffc;
230 LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
233 /*******************************
234 *** setup vertical clipping ***
235 *******************************/
237 /* calculate inputbitmap origin adress */
238 moi->a1orgv = (uintptr_t)((vuint32 *)si->overlay.ob.buffer);
239 moi->a1orgv -= (uintptr_t)((vuint32 *)si->framebuffer);
240 LOG(4, ("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
242 /* Setup vertical source start: first (sub)pixel contributing to output picture. */
243 /* Note:
244 * The method is to calculate, based on 1:1 scaling, based on the output window.
245 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
246 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
247 /* Note also:
248 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
250 moi->v1srcstv = 0;
251 /* check for destination vertical clipping at top side */
252 if (si->overlay.ow.v_start < crtc_vstart)
254 /* check if entire destination picture is clipping at top:
255 * (2 pixels will be clamped onscreen at least) */
256 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
258 /* increase 'number of clipping pixels' with 'fixed value':
259 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
260 moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
261 /* on pre-NV10 we need to do clipping in the source
262 * bitmap because no seperate clipping registers exist... */
263 if (si->ps.card_arch < NV10A)
264 moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
266 else
268 /* increase 'first contributing pixel' with:
269 * number of destination picture clipping pixels * inverse scaling factor */
270 moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
271 /* on pre-NV10 we need to do clipping in the source
272 * bitmap because no seperate clipping registers exist... */
273 if (si->ps.card_arch < NV10A)
274 moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
276 LOG(4,("Overlay: clipping at top...\n"));
278 /* take zoom into account */
279 moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
280 if (si->ps.card_arch < NV10A)
282 moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
283 LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
285 LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
287 /* AND below is probably required by hardware. */
288 /* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
289 moi->a1orgv &= 0xfffffff0;
292 static void nv_bes_program_move_overlay(move_overlay_info moi)
294 /*************************************
295 *** sync to BES (Back End Scaler) ***
296 *************************************/
298 /* Done in card hardware:
299 * double buffered registers + trigger if programming complete feature. */
302 /**************************************
303 *** actually program the registers ***
304 **************************************/
306 if (si->ps.card_arch < NV10A)
308 /* unknown, but needed (otherwise high-res distortions and only half the frames */
309 BESW(NV04_OE_STATE, 0x00000000);
310 /* select buffer 0 as active (b16) */
311 BESW(NV04_SU_STATE, 0x00000000);
312 /* unknown (no effect?) */
313 BESW(NV04_RM_STATE, 0x00000000);
314 /* setup clipped(!) buffer startadress in RAM */
315 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
316 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
317 /* (program both buffers to prevent sync distortions) */
318 /* first include 'pixel precise' left clipping... (top clipping was already included) */
319 moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
320 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
321 BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
322 BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
323 /* setup output window position */
324 BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
325 /* setup output window size */
326 BESW(NV04_DSTSIZE, (
327 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
328 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
330 /* select buffer 1 as active (b16) */
331 BESW(NV04_SU_STATE, 0x00010000);
333 else
335 /* >= NV10A */
337 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
338 BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
339 /* setup output window position */
340 BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
341 /* setup output window size */
342 BESW(NV10_0DSTSIZE, (
343 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
344 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
346 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
347 /* This also triggers activation of programmed values (double buffered registers feature) */
348 BESW(NV10_BUFSEL, 0x00000001);
352 status_t nv_bes_to_crtc(bool crtc)
354 if (si->ps.secondary_head)
356 if (crtc)
358 LOG(4,("Overlay: switching overlay to CRTC2\n"));
359 /* switch overlay engine to CRTC2 */
360 NV_REG32(NV32_FUNCSEL) &= ~0x00001000;
361 NV_REG32(NV32_2FUNCSEL) |= 0x00001000;
362 si->overlay.crtc = !si->crtc_switch_mode;
364 else
366 LOG(4,("Overlay: switching overlay to CRTC1\n"));
367 /* switch overlay engine to CRTC1 */
368 NV_REG32(NV32_2FUNCSEL) &= ~0x00001000;
369 NV_REG32(NV32_FUNCSEL) |= 0x00001000;
370 si->overlay.crtc = si->crtc_switch_mode;
372 return B_OK;
374 else
376 return B_ERROR;
380 status_t nv_bes_init()
382 if (si->ps.card_arch < NV10A)
384 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
385 BESW(NV04_INTE, 0x00000000);
387 /* setup saturation to be 'neutral' */
388 BESW(NV04_SAT, 0x00000000);
389 /* setup RGB brightness to be 'neutral' */
390 BESW(NV04_RED_AMP, 0x00000069);
391 BESW(NV04_GRN_AMP, 0x0000003e);
392 BESW(NV04_BLU_AMP, 0x00000089);
394 /* setup fifo for fetching data */
395 BESW(NV04_FIFOBURL, 0x00000003);
396 BESW(NV04_FIFOTHRS, 0x00000038);
398 /* unknown, but needed (registers only have b0 implemented) */
399 /* (program both buffers to prevent sync distortions) */
400 BESW(NV04_0OFFSET, 0x00000000);
401 BESW(NV04_1OFFSET, 0x00000000);
403 else
405 /* >= NV10A */
407 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
408 BESW(NV10_INTE, 0x00000000);
409 /* shut off GeForce4MX MPEG2 decoder */
410 BESW(DEC_GENCTRL, 0x00000000);
411 /* setup BES memory-range mask */
412 BESW(NV10_0MEMMASK, (si->ps.memory_size - 1));
413 /* unknown, but needed */
414 BESW(NV10_0OFFSET, 0x00000000);
416 /* setup brightness, contrast and saturation to be 'neutral' */
417 BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
418 BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
421 /* make sure the engine is disabled. */
422 nv_release_bes();
424 return B_OK;
427 status_t nv_configure_bes
428 (const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
430 /* yuy2 (4:2:2) colorspace calculations */
432 /* Note:
433 * in BeOS R5.0.3 and DANO:
434 * 'ow->offset_xxx' is always 0, so not used;
435 * 'ow->width' and 'ow->height' are the output window size: does not change
436 * if window is clipping;
437 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
438 * window. These values can be negative: this means the window is clipping
439 * at the left or the top of the display, respectively. */
441 /* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
442 * displayed on screen. This is used for the 'hardware zoom' function. */
444 /* output window position and clipping info for source buffer */
445 move_overlay_info moi;
446 /* calculated BES register values */
447 uint32 hiscalv, viscalv;
448 /* interval representation, used for scaling calculations */
449 uint16 intrep;
450 /* inverse scaling factor, used for source positioning */
451 uint32 ifactor;
452 /* copy of overlay view which has checked valid values */
453 overlay_view my_ov;
456 /**************************************************************************************
457 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
458 **************************************************************************************/
459 my_ov = *ov;
460 /* check for valid 'coordinates' */
461 if (my_ov.width == 0) my_ov.width++;
462 if (my_ov.height == 0) my_ov.height++;
463 if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
464 my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
465 if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
466 my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
467 if (my_ov.v_start > (ob->height - 1))
468 my_ov.v_start = (ob->height - 1);
469 if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
470 my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
472 LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
473 my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
475 /* save for nv_bes_calc_move_overlay() */
476 si->overlay.ow = *ow;
477 si->overlay.ob = *ob;
478 si->overlay.my_ov = my_ov;
481 /********************************
482 *** setup horizontal scaling ***
483 ********************************/
484 LOG(4,("Overlay: total input picture width = %d, height = %d\n",
485 (ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
486 LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
488 /* determine interval representation value, taking zoom into account */
489 if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
491 /* horizontal filtering is ON */
492 if ((my_ov.width == ow->width) | (ow->width < 2))
494 /* no horizontal scaling used, OR destination width < 2 */
495 intrep = 0;
497 else
499 intrep = 1;
502 else
504 /* horizontal filtering is OFF */
505 if ((ow->width < my_ov.width) & (ow->width >= 2))
507 /* horizontal downscaling used AND destination width >= 2 */
508 intrep = 1;
510 else
512 intrep = 0;
515 LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
517 /* calculate inverse horizontal scaling factor, taking zoom into account */
518 /* standard scaling formula: */
519 ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
521 /* correct factor to prevent most-right visible 'line' from distorting */
522 ifactor -= (1 << 2);
523 hiscalv = ifactor;
524 /* save for nv_bes_calc_move_overlay() */
525 si->overlay.h_ifactor = ifactor;
526 LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
528 /* check scaling factor (and modify if needed) to be within scaling limits */
529 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
530 if (hiscalv < 0x00002000)
532 /* (non-inverse) factor too large, set factor to max. valid value */
533 hiscalv = 0x00002000;
534 LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
536 switch (si->ps.card_arch)
538 case NV04A:
539 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
540 * (16bit register with 0.11 format value) */
541 if (hiscalv > 0x0000ffff)
543 /* (non-inverse) factor too small, set factor to min. valid value */
544 hiscalv = 0x0000ffff;
545 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
547 break;
548 case NV30A:
549 case NV40A:
550 /* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
551 if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31))
553 /* (non-inverse) factor too small, set factor to min. valid value */
554 hiscalv = (2 << 16);
555 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
557 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
558 * So let it fall through... */
559 if (si->ps.card_type != NV31) break;
560 default:
561 /* the rest has a downscaling limit of 0.125 */
562 if (hiscalv > (8 << 16))
564 /* (non-inverse) factor too small, set factor to min. valid value */
565 hiscalv = (8 << 16);
566 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
568 break;
570 /* AND below is required by hardware */
571 hiscalv &= 0x001ffffc;
574 /******************************
575 *** setup vertical scaling ***
576 ******************************/
578 /* determine interval representation value, taking zoom into account */
579 if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
581 /* vertical filtering is ON */
582 if ((my_ov.height == ow->height) | (ow->height < 2))
584 /* no vertical scaling used, OR destination height < 2 */
585 intrep = 0;
587 else
589 intrep = 1;
592 else
594 /* vertical filtering is OFF */
595 if ((ow->height < my_ov.height) & (ow->height >= 2))
597 /* vertical downscaling used AND destination height >= 2 */
598 intrep = 1;
600 else
602 intrep = 0;
605 LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
607 /* calculate inverse vertical scaling factor, taking zoom into account */
608 /* standard scaling formula: */
609 ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
611 /* correct factor to prevent lowest visible line from distorting */
612 ifactor -= (1 << 2);
613 LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
615 /* preserve ifactor for source positioning calculations later on */
616 viscalv = ifactor;
617 /* save for nv_bes_calc_move_overlay() */
618 si->overlay.v_ifactor = ifactor;
620 /* check scaling factor (and modify if needed) to be within scaling limits */
621 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
622 if (viscalv < 0x00002000)
624 /* (non-inverse) factor too large, set factor to max. valid value */
625 viscalv = 0x00002000;
626 LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
628 switch (si->ps.card_arch)
630 case NV04A:
631 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
632 * (16bit register with 0.11 format value) */
633 if (viscalv > 0x0000ffff)
635 /* (non-inverse) factor too small, set factor to min. valid value */
636 viscalv = 0x0000ffff;
637 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5)));
639 break;
640 case NV30A:
641 case NV40A:
642 /* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
643 if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31))
645 /* (non-inverse) factor too small, set factor to min. valid value */
646 viscalv = (2 << 16);
647 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
649 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
650 * So let it fall through... */
651 if (si->ps.card_type != NV31) break;
652 default:
653 /* the rest has a downscaling limit of 0.125 */
654 if (viscalv > (8 << 16))
656 /* (non-inverse) factor too small, set factor to min. valid value */
657 viscalv = (8 << 16);
658 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
660 break;
662 /* AND below is required by hardware */
663 viscalv &= 0x001ffffc;
666 /********************************************************************************
667 *** setup all edges of output window, setup horizontal and vertical clipping ***
668 ********************************************************************************/
669 nv_bes_calc_move_overlay(&moi);
672 /*****************************
673 *** log color keying info ***
674 *****************************/
676 LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
677 ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
678 LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
679 ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
682 /*****************
683 *** log flags ***
684 *****************/
686 LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags));
687 /* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
690 /*************************************
691 *** sync to BES (Back End Scaler) ***
692 *************************************/
694 /* Done in card hardware:
695 * double buffered registers + trigger if programming complete feature. */
698 /**************************************
699 *** actually program the registers ***
700 **************************************/
702 if (si->ps.card_arch < NV10A)
704 /* unknown, but needed (otherwise high-res distortions and only half the frames */
705 BESW(NV04_OE_STATE, 0x00000000);
706 /* select buffer 0 as active (b16) */
707 BESW(NV04_SU_STATE, 0x00000000);
708 /* unknown (no effect?) */
709 BESW(NV04_RM_STATE, 0x00000000);
710 /* setup clipped(!) buffer startadress in RAM */
711 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
712 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
713 /* (program both buffers to prevent sync distortions) */
714 /* first include 'pixel precise' left clipping... (top clipping was already included) */
715 moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
716 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
717 BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
718 BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
719 /* setup buffer source pitch including slopspace (in bytes).
720 * Note:
721 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
722 /* (program both buffers to prevent sync distortions) */
723 BESW(NV04_0SRCPTCH, (ob->width * 2));
724 BESW(NV04_1SRCPTCH, (ob->width * 2));
725 /* setup output window position */
726 BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
727 /* setup output window size */
728 BESW(NV04_DSTSIZE, (
729 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
730 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
732 /* setup horizontal and vertical scaling */
733 BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5)));
734 /* enable vertical filtering (b0) */
735 BESW(NV04_CTRL_V, 0x00000001);
736 /* enable horizontal filtering (no effect?) */
737 BESW(NV04_CTRL_H, 0x00000111);
738 /* enable BES (b0), set colorkeying (b4), format yuy2 (b8: 0 = ccir) */
739 if (ow->flags & B_OVERLAY_COLOR_KEY)
740 BESW(NV04_GENCTRL, 0x00000111);
741 else
742 BESW(NV04_GENCTRL, 0x00000101);
743 /* select buffer 1 as active (b16) */
744 BESW(NV04_SU_STATE, 0x00010000);
746 /**************************
747 *** setup color keying ***
748 **************************/
750 /* setup colorkeying */
751 switch(si->dm.space)
753 case B_RGB15_LITTLE:
754 BESW(NV04_COLKEY, (
755 ((ow->blue.value & ow->blue.mask) << 0) |
756 ((ow->green.value & ow->green.mask) << 5) |
757 ((ow->red.value & ow->red.mask) << 10) |
758 ((ow->alpha.value & ow->alpha.mask) << 15)
760 break;
761 case B_RGB16_LITTLE:
762 BESW(NV04_COLKEY, (
763 ((ow->blue.value & ow->blue.mask) << 0) |
764 ((ow->green.value & ow->green.mask) << 5) |
765 ((ow->red.value & ow->red.mask) << 11)
766 /* this space has no alpha bits */
768 break;
769 case B_CMAP8:
770 case B_RGB32_LITTLE:
771 default:
772 BESW(NV04_COLKEY, (
773 ((ow->blue.value & ow->blue.mask) << 0) |
774 ((ow->green.value & ow->green.mask) << 8) |
775 ((ow->red.value & ow->red.mask) << 16) |
776 ((ow->alpha.value & ow->alpha.mask) << 24)
778 break;
781 else
783 /* >= NV10A */
785 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
786 BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
787 /* setup buffersize */
788 //fixme if needed: width must be even officially...
789 BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
790 /* setup source pitch including slopspace (in bytes),
791 * b16: select YUY2 (0 = YV12), b20: set colorkeying, b24: no iturbt_709 (do iturbt_601) */
792 /* Note:
793 * source pitch granularity = 32 pixels on GeForce cards!! */
794 if (ow->flags & B_OVERLAY_COLOR_KEY)
795 BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
796 else
797 BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (0 << 20) | (0 << 24)));
798 /* setup output window position */
799 BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
800 /* setup output window size */
801 BESW(NV10_0DSTSIZE, (
802 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
803 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
805 /* setup horizontal scaling */
806 BESW(NV10_0ISCALH, (hiscalv << 4));
807 /* setup vertical scaling */
808 BESW(NV10_0ISCALV, (viscalv << 4));
809 /* setup (unclipped!) buffer startadress in RAM */
810 BESW(NV10_0BUFADR, moi.a1orgv);
811 /* enable BES (b0 = 0) */
812 BESW(NV10_GENCTRL, 0x00000000);
813 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
814 /* This also triggers activation of programmed values (double buffered registers feature) */
815 BESW(NV10_BUFSEL, 0x00000001);
817 /**************************
818 *** setup color keying ***
819 **************************/
821 /* setup colorkeying */
822 switch(si->dm.space)
824 case B_RGB15_LITTLE:
825 BESW(NV10_COLKEY, (
826 ((ow->blue.value & ow->blue.mask) << 0) |
827 ((ow->green.value & ow->green.mask) << 5) |
828 ((ow->red.value & ow->red.mask) << 10) |
829 ((ow->alpha.value & ow->alpha.mask) << 15)
831 break;
832 case B_RGB16_LITTLE:
833 BESW(NV10_COLKEY, (
834 ((ow->blue.value & ow->blue.mask) << 0) |
835 ((ow->green.value & ow->green.mask) << 5) |
836 ((ow->red.value & ow->red.mask) << 11)
837 /* this space has no alpha bits */
839 break;
840 case B_CMAP8:
841 case B_RGB32_LITTLE:
842 default:
843 BESW(NV10_COLKEY, (
844 ((ow->blue.value & ow->blue.mask) << 0) |
845 ((ow->green.value & ow->green.mask) << 8) |
846 ((ow->red.value & ow->red.mask) << 16) |
847 ((ow->alpha.value & ow->alpha.mask) << 24)
849 break;
853 /* note that overlay is in use (for nv_bes_move_overlay()) */
854 si->overlay.active = true;
856 return B_OK;
859 status_t nv_release_bes()
861 if (si->ps.card_arch < NV10A)
863 /* setup BES control: disable scaler (b0 = 0) */
864 BESW(NV04_GENCTRL, 0x00000000);
866 else
868 /* setup BES control: disable scaler (b0 = 1) */
869 BESW(NV10_GENCTRL, 0x00000001);
872 /* note that overlay is not in use (for nv_bes_move_overlay()) */
873 si->overlay.active = false;
875 return B_OK;