vfs: check userland buffers before reading them.
[haiku.git] / src / add-ons / accelerants / skeleton / engine / bes.c
blob22a0a5e5f5672d21d5d6d19a159886744e04acb2
1 /* Nvidia TNT and GeForce Back End Scaler functions */
2 /* Written by Rudolf Cornelissen 05/2002-9/2004 */
4 #define MODULE_BIT 0x00000200
6 #include "std.h"
8 typedef struct move_overlay_info move_overlay_info;
10 struct move_overlay_info
12 uint32 hcoordv; /* left and right edges of video output window */
13 uint32 vcoordv; /* top and bottom edges of video output window */
14 uint32 hsrcstv; /* horizontal source start in source buffer (clipping) */
15 uint32 v1srcstv; /* vertical source start in source buffer (clipping) */
16 uint32 a1orgv; /* alternate source clipping via startadress of source buffer */
19 static void eng_bes_calc_move_overlay(move_overlay_info *moi);
20 static void eng_bes_program_move_overlay(move_overlay_info moi);
22 /* move the overlay output window in virtualscreens */
23 /* Note:
24 * si->dm.h_display_start and si->dm.v_display_start determine where the new
25 * output window is located! */
26 void eng_bes_move_overlay()
28 move_overlay_info moi;
30 /* abort if overlay is not active */
31 if (!si->overlay.active) return;
33 eng_bes_calc_move_overlay(&moi);
34 eng_bes_program_move_overlay(moi);
37 static void eng_bes_calc_move_overlay(move_overlay_info *moi)
39 /* misc used variables */
40 uint16 temp1, temp2;
41 /* visible screen window in virtual workspaces */
42 uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
44 /* do 'overlay follow head' in dualhead modes on dualhead cards */
45 if (si->ps.secondary_head)
47 switch (si->dm.flags & DUALHEAD_BITS)
49 case DUALHEAD_ON:
50 case DUALHEAD_SWITCH:
51 if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) <
52 (si->dm.h_display_start + si->dm.timing.h_display))
53 eng_bes_to_crtc(si->crtc_switch_mode);
54 else
55 eng_bes_to_crtc(!si->crtc_switch_mode);
56 break;
57 default:
58 eng_bes_to_crtc(si->crtc_switch_mode);
59 break;
63 /* the BES does not respect virtual_workspaces, but adheres to CRTC
64 * constraints only */
65 crtc_hstart = si->dm.h_display_start;
66 /* make dualhead stretch and switch mode work while we're at it.. */
67 if (si->overlay.crtc)
69 crtc_hstart += si->dm.timing.h_display;
72 /* horizontal end is the first position beyond the displayed range on the CRTC */
73 crtc_hend = crtc_hstart + si->dm.timing.h_display;
74 crtc_vstart = si->dm.v_display_start;
75 /* vertical end is the first position beyond the displayed range on the CRTC */
76 crtc_vend = crtc_vstart + si->dm.timing.v_display;
79 /****************************************
80 *** setup all edges of output window ***
81 ****************************************/
83 /* setup left and right edges of output window */
84 moi->hcoordv = 0;
85 /* left edge coordinate of output window, must be inside desktop */
86 /* clipping on the left side */
87 if (si->overlay.ow.h_start < crtc_hstart)
89 temp1 = 0;
91 else
93 /* clipping on the right side */
94 if (si->overlay.ow.h_start >= (crtc_hend - 1))
96 /* width < 2 is not allowed */
97 temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
99 else
100 /* no clipping here */
102 temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
105 moi->hcoordv |= temp1 << 16;
106 /* right edge coordinate of output window, must be inside desktop */
107 /* width < 2 is not allowed */
108 if (si->overlay.ow.width < 2)
110 temp2 = (temp1 + 1) & 0x7ff;
112 else
114 /* clipping on the right side */
115 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
117 temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
119 else
121 /* clipping on the left side */
122 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
124 /* width < 2 is not allowed */
125 temp2 = 1;
127 else
128 /* no clipping here */
130 temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
134 moi->hcoordv |= temp2 << 0;
135 LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
137 /* setup top and bottom edges of output window */
138 moi->vcoordv = 0;
139 /* top edge coordinate of output window, must be inside desktop */
140 /* clipping on the top side */
141 if (si->overlay.ow.v_start < crtc_vstart)
143 temp1 = 0;
145 else
147 /* clipping on the bottom side */
148 if (si->overlay.ow.v_start >= (crtc_vend - 1))
150 /* height < 2 is not allowed */
151 temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
153 else
154 /* no clipping here */
156 temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
159 moi->vcoordv |= temp1 << 16;
160 /* bottom edge coordinate of output window, must be inside desktop */
161 /* height < 2 is not allowed */
162 if (si->overlay.ow.height < 2)
164 temp2 = (temp1 + 1) & 0x7ff;
166 else
168 /* clipping on the bottom side */
169 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
171 temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
173 else
175 /* clipping on the top side */
176 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
178 /* height < 2 is not allowed */
179 temp2 = 1;
181 else
182 /* no clipping here */
184 temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
188 moi->vcoordv |= temp2 << 0;
189 LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
192 /*********************************
193 *** setup horizontal clipping ***
194 *********************************/
196 /* Setup horizontal source start: first (sub)pixel contributing to output picture */
197 /* Note:
198 * The method is to calculate, based on 1:1 scaling, based on the output window.
199 * After this is done, include the scaling factor so you get a value based on the input bitmap.
200 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
201 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
202 /* Note also:
203 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
204 moi->hsrcstv = 0;
205 /* check for destination horizontal clipping at left side */
206 if (si->overlay.ow.h_start < crtc_hstart)
208 /* check if entire destination picture is clipping left:
209 * (2 pixels will be clamped onscreen at least) */
210 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
212 /* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
213 moi->hsrcstv += (si->overlay.ow.width - 2);
215 else
217 /* increase 'first contributing pixel' with actual number of dest. clipping pixels */
218 moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
220 LOG(4,("Overlay: clipping left...\n"));
222 /* The calculated value is based on scaling = 1x. So we now compensate for scaling.
223 * Note that this also already takes care of aligning the value to the BES register! */
224 moi->hsrcstv *= si->overlay.h_ifactor;
226 /* take zoom into account */
227 moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
228 /* AND below required by hardware */
229 moi->hsrcstv &= 0x03fffffc;
230 LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
233 /*******************************
234 *** setup vertical clipping ***
235 *******************************/
237 /* calculate inputbitmap origin adress */
238 moi->a1orgv = (uint32)((vuint32 *)si->overlay.ob.buffer);
239 moi->a1orgv -= (uint32)((vuint32 *)si->framebuffer);
240 LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
242 /* Setup vertical source start: first (sub)pixel contributing to output picture. */
243 /* Note:
244 * The method is to calculate, based on 1:1 scaling, based on the output window.
245 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
246 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
247 /* Note also:
248 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
250 moi->v1srcstv = 0;
251 /* check for destination vertical clipping at top side */
252 if (si->overlay.ow.v_start < crtc_vstart)
254 /* check if entire destination picture is clipping at top:
255 * (2 pixels will be clamped onscreen at least) */
256 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
258 /* increase 'number of clipping pixels' with 'fixed value':
259 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
260 moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
261 /* on pre-NV10 we need to do clipping in the source
262 * bitmap because no seperate clipping registers exist... */
263 if (si->ps.card_arch < NV10A)
264 moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
266 else
268 /* increase 'first contributing pixel' with:
269 * number of destination picture clipping pixels * inverse scaling factor */
270 moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
271 /* on pre-NV10 we need to do clipping in the source
272 * bitmap because no seperate clipping registers exist... */
273 if (si->ps.card_arch < NV10A)
274 moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
276 LOG(4,("Overlay: clipping at top...\n"));
278 /* take zoom into account */
279 moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
280 if (si->ps.card_arch < NV10A)
282 moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
283 LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
285 LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
287 /* AND below is probably required by hardware. */
288 /* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
289 moi->a1orgv &= 0xfffffff0;
292 static void eng_bes_program_move_overlay(move_overlay_info moi)
294 /*************************************
295 *** sync to BES (Back End Scaler) ***
296 *************************************/
298 /* Done in card hardware:
299 * double buffered registers + trigger if programming complete feature. */
302 /**************************************
303 *** actually program the registers ***
304 **************************************/
306 if (si->ps.card_arch < NV10A)
308 /* unknown, but needed (otherwise high-res distortions and only half the frames */
309 BESW(NV04_OE_STATE, 0x00000000);
310 /* select buffer 0 as active (b16) */
311 BESW(NV04_SU_STATE, 0x00000000);
312 /* unknown (no effect?) */
313 BESW(NV04_RM_STATE, 0x00000000);
314 /* setup clipped(!) buffer startadress in RAM */
315 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
316 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
317 /* (program both buffers to prevent sync distortions) */
318 /* first include 'pixel precise' left clipping... (top clipping was already included) */
319 moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
320 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
321 BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
322 BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
323 /* setup output window position */
324 BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
325 /* setup output window size */
326 BESW(NV04_DSTSIZE, (
327 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
328 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
330 /* select buffer 1 as active (b16) */
331 BESW(NV04_SU_STATE, 0x00010000);
333 else
335 /* >= NV10A */
337 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
338 BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
339 /* setup output window position */
340 BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
341 /* setup output window size */
342 BESW(NV10_0DSTSIZE, (
343 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
344 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
346 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
347 /* This also triggers activation of programmed values (double buffered registers feature) */
348 BESW(NV10_BUFSEL, 0x00000001);
352 status_t eng_bes_to_crtc(bool crtc)
354 if (si->ps.secondary_head)
356 if (crtc)
358 LOG(4,("Overlay: switching overlay to CRTC2\n"));
359 /* switch overlay engine to CRTC2 */
360 ENG_RG32(RG32_FUNCSEL) &= ~0x00001000;
361 ENG_RG32(RG32_2FUNCSEL) |= 0x00001000;
362 si->overlay.crtc = !si->crtc_switch_mode;
364 else
366 LOG(4,("Overlay: switching overlay to CRTC1\n"));
367 /* switch overlay engine to CRTC1 */
368 ENG_RG32(RG32_2FUNCSEL) &= ~0x00001000;
369 ENG_RG32(RG32_FUNCSEL) |= 0x00001000;
370 si->overlay.crtc = si->crtc_switch_mode;
372 return B_OK;
374 else
376 return B_ERROR;
380 status_t eng_bes_init()
382 if (si->ps.card_arch < NV10A)
384 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
385 BESW(NV04_INTE, 0x00000000);
387 /* setup saturation to be 'neutral' */
388 BESW(NV04_SAT, 0x00000000);
389 /* setup RGB brightness to be 'neutral' */
390 BESW(NV04_RED_AMP, 0x00000069);
391 BESW(NV04_GRN_AMP, 0x0000003e);
392 BESW(NV04_BLU_AMP, 0x00000089);
394 /* setup fifo for fetching data */
395 BESW(NV04_FIFOBURL, 0x00000003);
396 BESW(NV04_FIFOTHRS, 0x00000038);
398 /* unknown, but needed (registers only have b0 implemented) */
399 /* (program both buffers to prevent sync distortions) */
400 BESW(NV04_0OFFSET, 0x00000000);
401 BESW(NV04_1OFFSET, 0x00000000);
403 else
405 /* >= NV10A */
407 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
408 BESW(NV10_INTE, 0x00000000);
409 /* shut off GeForce4MX MPEG2 decoder */
410 BESW(DEC_GENCTRL, 0x00000000);
411 /* setup BES memory-range mask */
412 BESW(NV10_0MEMMASK, (si->ps.memory_size - 1));
413 /* unknown, but needed */
414 BESW(NV10_0OFFSET, 0x00000000);
416 /* setup brightness, contrast and saturation to be 'neutral' */
417 BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
418 BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
421 return B_OK;
424 status_t eng_configure_bes
425 (const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
427 /* yuy2 (4:2:2) colorspace calculations */
429 /* Note:
430 * in BeOS R5.0.3 and DANO:
431 * 'ow->offset_xxx' is always 0, so not used;
432 * 'ow->width' and 'ow->height' are the output window size: does not change
433 * if window is clipping;
434 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
435 * window. These values can be negative: this means the window is clipping
436 * at the left or the top of the display, respectively. */
438 /* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
439 * displayed on screen. This is used for the 'hardware zoom' function. */
441 /* output window position and clipping info for source buffer */
442 move_overlay_info moi;
443 /* calculated BES register values */
444 uint32 hiscalv, viscalv;
445 /* interval representation, used for scaling calculations */
446 uint16 intrep;
447 /* inverse scaling factor, used for source positioning */
448 uint32 ifactor;
449 /* copy of overlay view which has checked valid values */
450 overlay_view my_ov;
453 /**************************************************************************************
454 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
455 **************************************************************************************/
456 my_ov = *ov;
457 /* check for valid 'coordinates' */
458 if (my_ov.width == 0) my_ov.width++;
459 if (my_ov.height == 0) my_ov.height++;
460 if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
461 my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
462 if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
463 my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
464 if (my_ov.v_start > (ob->height - 1))
465 my_ov.v_start = (ob->height - 1);
466 if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
467 my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
469 LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
470 my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
472 /* save for eng_bes_calc_move_overlay() */
473 si->overlay.ow = *ow;
474 si->overlay.ob = *ob;
475 si->overlay.my_ov = my_ov;
478 /********************************
479 *** setup horizontal scaling ***
480 ********************************/
481 LOG(4,("Overlay: total input picture width = %d, height = %d\n",
482 (ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
483 LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
485 /* determine interval representation value, taking zoom into account */
486 if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
488 /* horizontal filtering is ON */
489 if ((my_ov.width == ow->width) | (ow->width < 2))
491 /* no horizontal scaling used, OR destination width < 2 */
492 intrep = 0;
494 else
496 intrep = 1;
499 else
501 /* horizontal filtering is OFF */
502 if ((ow->width < my_ov.width) & (ow->width >= 2))
504 /* horizontal downscaling used AND destination width >= 2 */
505 intrep = 1;
507 else
509 intrep = 0;
512 LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
514 /* calculate inverse horizontal scaling factor, taking zoom into account */
515 /* standard scaling formula: */
516 ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
518 /* correct factor to prevent most-right visible 'line' from distorting */
519 ifactor -= (1 << 2);
520 hiscalv = ifactor;
521 /* save for eng_bes_calc_move_overlay() */
522 si->overlay.h_ifactor = ifactor;
523 LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
525 /* check scaling factor (and modify if needed) to be within scaling limits */
526 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
527 if (hiscalv < 0x00002000)
529 /* (non-inverse) factor too large, set factor to max. valid value */
530 hiscalv = 0x00002000;
531 LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
533 switch (si->ps.card_arch)
535 case NV04A:
536 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
537 * (16bit register with 0.11 format value) */
538 if (hiscalv > 0x0000ffff)
540 /* (non-inverse) factor too small, set factor to min. valid value */
541 hiscalv = 0x0000ffff;
542 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
544 break;
545 case NV30A:
546 case NV40A:
547 /* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
548 if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31))
550 /* (non-inverse) factor too small, set factor to min. valid value */
551 hiscalv = (2 << 16);
552 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
554 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
555 * So let it fall through... */
556 if (si->ps.card_type != NV31) break;
557 default:
558 /* the rest has a downscaling limit of 0.125 */
559 if (hiscalv > (8 << 16))
561 /* (non-inverse) factor too small, set factor to min. valid value */
562 hiscalv = (8 << 16);
563 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
565 break;
567 /* AND below is required by hardware */
568 hiscalv &= 0x001ffffc;
571 /******************************
572 *** setup vertical scaling ***
573 ******************************/
575 /* determine interval representation value, taking zoom into account */
576 if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
578 /* vertical filtering is ON */
579 if ((my_ov.height == ow->height) | (ow->height < 2))
581 /* no vertical scaling used, OR destination height < 2 */
582 intrep = 0;
584 else
586 intrep = 1;
589 else
591 /* vertical filtering is OFF */
592 if ((ow->height < my_ov.height) & (ow->height >= 2))
594 /* vertical downscaling used AND destination height >= 2 */
595 intrep = 1;
597 else
599 intrep = 0;
602 LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
604 /* calculate inverse vertical scaling factor, taking zoom into account */
605 /* standard scaling formula: */
606 ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
608 /* correct factor to prevent lowest visible line from distorting */
609 ifactor -= (1 << 2);
610 LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
612 /* preserve ifactor for source positioning calculations later on */
613 viscalv = ifactor;
614 /* save for eng_bes_calc_move_overlay() */
615 si->overlay.v_ifactor = ifactor;
617 /* check scaling factor (and modify if needed) to be within scaling limits */
618 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
619 if (viscalv < 0x00002000)
621 /* (non-inverse) factor too large, set factor to max. valid value */
622 viscalv = 0x00002000;
623 LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
625 switch (si->ps.card_arch)
627 case NV04A:
628 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
629 * (16bit register with 0.11 format value) */
630 if (viscalv > 0x0000ffff)
632 /* (non-inverse) factor too small, set factor to min. valid value */
633 viscalv = 0x0000ffff;
634 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5)));
636 break;
637 case NV30A:
638 case NV40A:
639 /* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
640 if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31))
642 /* (non-inverse) factor too small, set factor to min. valid value */
643 viscalv = (2 << 16);
644 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
646 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
647 * So let it fall through... */
648 if (si->ps.card_type != NV31) break;
649 default:
650 /* the rest has a downscaling limit of 0.125 */
651 if (viscalv > (8 << 16))
653 /* (non-inverse) factor too small, set factor to min. valid value */
654 viscalv = (8 << 16);
655 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
657 break;
659 /* AND below is required by hardware */
660 viscalv &= 0x001ffffc;
663 /********************************************************************************
664 *** setup all edges of output window, setup horizontal and vertical clipping ***
665 ********************************************************************************/
666 eng_bes_calc_move_overlay(&moi);
669 /*****************************
670 *** log color keying info ***
671 *****************************/
673 LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
674 ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
675 LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
676 ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
679 /*****************
680 *** log flags ***
681 *****************/
683 LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags));
684 /* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
687 /*************************************
688 *** sync to BES (Back End Scaler) ***
689 *************************************/
691 /* Done in card hardware:
692 * double buffered registers + trigger if programming complete feature. */
695 /**************************************
696 *** actually program the registers ***
697 **************************************/
699 if (si->ps.card_arch < NV10A)
701 /* unknown, but needed (otherwise high-res distortions and only half the frames */
702 BESW(NV04_OE_STATE, 0x00000000);
703 /* select buffer 0 as active (b16) */
704 BESW(NV04_SU_STATE, 0x00000000);
705 /* unknown (no effect?) */
706 BESW(NV04_RM_STATE, 0x00000000);
707 /* setup clipped(!) buffer startadress in RAM */
708 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
709 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
710 /* (program both buffers to prevent sync distortions) */
711 /* first include 'pixel precise' left clipping... (top clipping was already included) */
712 moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
713 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
714 BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
715 BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
716 /* setup buffer source pitch including slopspace (in bytes).
717 * Note:
718 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
719 /* (program both buffers to prevent sync distortions) */
720 BESW(NV04_0SRCPTCH, (ob->width * 2));
721 BESW(NV04_1SRCPTCH, (ob->width * 2));
722 /* setup output window position */
723 BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
724 /* setup output window size */
725 BESW(NV04_DSTSIZE, (
726 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
727 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
729 /* setup horizontal and vertical scaling */
730 BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5)));
731 /* enable vertical filtering (b0) */
732 BESW(NV04_CTRL_V, 0x00000001);
733 /* enable horizontal filtering (no effect?) */
734 BESW(NV04_CTRL_H, 0x00000111);
736 /* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */
737 BESW(NV04_GENCTRL, 0x00000111);
738 /* select buffer 1 as active (b16) */
739 BESW(NV04_SU_STATE, 0x00010000);
741 /**************************
742 *** setup color keying ***
743 **************************/
745 /* setup colorkeying */
746 switch(si->dm.space)
748 case B_RGB15_LITTLE:
749 BESW(NV04_COLKEY, (
750 ((ow->blue.value & ow->blue.mask) << 0) |
751 ((ow->green.value & ow->green.mask) << 5) |
752 ((ow->red.value & ow->red.mask) << 10) |
753 ((ow->alpha.value & ow->alpha.mask) << 15)
755 break;
756 case B_RGB16_LITTLE:
757 BESW(NV04_COLKEY, (
758 ((ow->blue.value & ow->blue.mask) << 0) |
759 ((ow->green.value & ow->green.mask) << 5) |
760 ((ow->red.value & ow->red.mask) << 11)
761 /* this space has no alpha bits */
763 break;
764 case B_CMAP8:
765 case B_RGB32_LITTLE:
766 default:
767 BESW(NV04_COLKEY, (
768 ((ow->blue.value & ow->blue.mask) << 0) |
769 ((ow->green.value & ow->green.mask) << 8) |
770 ((ow->red.value & ow->red.mask) << 16) |
771 ((ow->alpha.value & ow->alpha.mask) << 24)
773 break;
776 else
778 /* >= NV10A */
780 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
781 BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
782 /* setup buffersize */
783 //fixme if needed: width must be even officially...
784 BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
785 /* setup source pitch including slopspace (in bytes),
786 * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */
787 /* Note:
788 * source pitch granularity = 32 pixels on GeForce cards!! */
789 BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
790 /* setup output window position */
791 BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
792 /* setup output window size */
793 BESW(NV10_0DSTSIZE, (
794 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
795 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
797 /* setup horizontal scaling */
798 BESW(NV10_0ISCALH, (hiscalv << 4));
799 /* setup vertical scaling */
800 BESW(NV10_0ISCALV, (viscalv << 4));
801 /* setup (unclipped!) buffer startadress in RAM */
802 BESW(NV10_0BUFADR, moi.a1orgv);
803 /* enable BES (b0 = 0) */
804 BESW(NV10_GENCTRL, 0x00000000);
805 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
806 /* This also triggers activation of programmed values (double buffered registers feature) */
807 BESW(NV10_BUFSEL, 0x00000001);
809 /**************************
810 *** setup color keying ***
811 **************************/
813 /* setup colorkeying */
814 switch(si->dm.space)
816 case B_RGB15_LITTLE:
817 BESW(NV10_COLKEY, (
818 ((ow->blue.value & ow->blue.mask) << 0) |
819 ((ow->green.value & ow->green.mask) << 5) |
820 ((ow->red.value & ow->red.mask) << 10) |
821 ((ow->alpha.value & ow->alpha.mask) << 15)
823 break;
824 case B_RGB16_LITTLE:
825 BESW(NV10_COLKEY, (
826 ((ow->blue.value & ow->blue.mask) << 0) |
827 ((ow->green.value & ow->green.mask) << 5) |
828 ((ow->red.value & ow->red.mask) << 11)
829 /* this space has no alpha bits */
831 break;
832 case B_CMAP8:
833 case B_RGB32_LITTLE:
834 default:
835 BESW(NV10_COLKEY, (
836 ((ow->blue.value & ow->blue.mask) << 0) |
837 ((ow->green.value & ow->green.mask) << 8) |
838 ((ow->red.value & ow->red.mask) << 16) |
839 ((ow->alpha.value & ow->alpha.mask) << 24)
841 break;
845 /* note that overlay is in use (for eng_bes_move_overlay()) */
846 si->overlay.active = true;
848 return B_OK;
851 status_t eng_release_bes()
853 if (si->ps.card_arch < NV10A)
855 /* setup BES control: disable scaler (b0 = 0) */
856 BESW(NV04_GENCTRL, 0x00000000);
858 else
860 /* setup BES control: disable scaler (b0 = 1) */
861 BESW(NV10_GENCTRL, 0x00000001);
864 /* note that overlay is not in use (for eng_bes_move_overlay()) */
865 si->overlay.active = false;
867 return B_OK;