2 * Copyright 2006-2009, Haiku, Inc. All Rights Reserved.
3 * Distributed under the terms of the MIT License.
6 * Axel Dörfler, axeld@pinc-software.de
8 * The phase coefficient computation was taken from the X driver written by
9 * Alan Hourihane and David Dawes.
13 #include "accelerant.h"
14 #include "accelerant_protos.h"
26 //#define TRACE_OVERLAY
28 # define TRACE(x...) _sPrintf("intel_extreme accelerant:" x)
33 #define ERROR(x...) _sPrintf("intel_extreme accelerant: " x)
34 #define CALLED(x...) TRACE("CALLED %s\n", __PRETTY_FUNCTION__)
37 #define NUM_HORIZONTAL_TAPS 5
38 #define NUM_VERTICAL_TAPS 3
39 #define NUM_HORIZONTAL_UV_TAPS 3
40 #define NUM_VERTICAL_UV_TAPS 3
44 struct phase_coefficient
{
51 /*! Splits the coefficient floating point value into the 3 components
52 sign, mantissa, and exponent.
55 split_coefficient(double &coefficient
, int32 mantissaSize
,
56 phase_coefficient
&splitCoefficient
)
58 double absCoefficient
= fabs(coefficient
);
61 if (coefficient
< 0.0)
66 int32 intCoefficient
, res
;
67 int32 maxValue
= 1 << mantissaSize
;
68 res
= 12 - mantissaSize
;
70 if ((intCoefficient
= (int)(absCoefficient
* 4 * maxValue
+ 0.5))
72 splitCoefficient
.exponent
= 3;
73 splitCoefficient
.mantissa
= intCoefficient
<< res
;
74 coefficient
= (double)intCoefficient
/ (double)(4 * maxValue
);
75 } else if ((intCoefficient
= (int)(absCoefficient
* 2 * maxValue
+ 0.5))
77 splitCoefficient
.exponent
= 2;
78 splitCoefficient
.mantissa
= intCoefficient
<< res
;
79 coefficient
= (double)intCoefficient
/ (double)(2 * maxValue
);
80 } else if ((intCoefficient
= (int)(absCoefficient
* maxValue
+ 0.5))
82 splitCoefficient
.exponent
= 1;
83 splitCoefficient
.mantissa
= intCoefficient
<< res
;
84 coefficient
= (double)intCoefficient
/ (double)maxValue
;
85 } else if ((intCoefficient
= (int)(absCoefficient
* maxValue
* 0.5 + 0.5))
87 splitCoefficient
.exponent
= 0;
88 splitCoefficient
.mantissa
= intCoefficient
<< res
;
89 coefficient
= (double)intCoefficient
/ (double)(maxValue
/ 2);
91 // coefficient out of range
95 splitCoefficient
.sign
= sign
;
97 coefficient
= -coefficient
;
104 update_coefficients(int32 taps
, double filterCutOff
, bool horizontal
, bool isY
,
105 phase_coefficient
* splitCoefficients
)
107 if (filterCutOff
< 1)
109 if (filterCutOff
> 3)
112 bool isVerticalUV
= !horizontal
&& !isY
;
113 int32 mantissaSize
= horizontal
? 7 : 6;
115 double rawCoefficients
[MAX_TAPS
* 32], coefficients
[NUM_PHASES
][MAX_TAPS
];
117 int32 num
= taps
* 16;
118 for (int32 i
= 0; i
< num
* 2; i
++) {
120 double value
= (1.0 / filterCutOff
) * taps
* M_PI
* (i
- num
)
125 sinc
= sin(value
) / value
;
128 double window
= (0.5 - 0.5 * cos(i
* M_PI
/ num
));
129 rawCoefficients
[i
] = sinc
* window
;
132 for (int32 i
= 0; i
< NUM_PHASES
; i
++) {
133 // Normalise the coefficients
136 for (int32 j
= 0; j
< taps
; j
++) {
138 sum
+= rawCoefficients
[pos
];
140 for (int32 j
= 0; j
< taps
; j
++) {
142 coefficients
[i
][j
] = rawCoefficients
[pos
] / sum
;
145 // split them into sign/mantissa/exponent
146 for (int32 j
= 0; j
< taps
; j
++) {
149 split_coefficient(coefficients
[i
][j
], mantissaSize
150 + (((j
== (taps
- 1) / 2) && !isVerticalUV
) ? 2 : 0),
151 splitCoefficients
[pos
]);
154 int32 tapAdjust
[MAX_TAPS
];
155 tapAdjust
[0] = (taps
- 1) / 2;
156 for (int32 j
= 1, k
= 1; j
<= tapAdjust
[0]; j
++, k
++) {
157 tapAdjust
[k
] = tapAdjust
[0] - j
;
158 tapAdjust
[++k
] = tapAdjust
[0] + j
;
161 // Adjust the coefficients
163 for (int32 j
= 0; j
< taps
; j
++) {
164 sum
+= coefficients
[i
][j
];
168 for (int32 k
= 0; k
< taps
; k
++) {
169 int32 tap2Fix
= tapAdjust
[k
];
170 double diff
= 1.0 - sum
;
172 coefficients
[i
][tap2Fix
] += diff
;
173 pos
= tap2Fix
+ i
* taps
;
175 split_coefficient(coefficients
[i
][tap2Fix
], mantissaSize
176 + (((tap2Fix
== (taps
- 1) / 2) && !isVerticalUV
) ? 2 : 0),
177 splitCoefficients
[pos
]);
180 for (int32 j
= 0; j
< taps
; j
++) {
181 sum
+= coefficients
[i
][j
];
192 set_color_key(uint8 red
, uint8 green
, uint8 blue
, uint8 redMask
,
193 uint8 greenMask
, uint8 blueMask
)
195 overlay_registers
* registers
= gInfo
->overlay_registers
;
197 registers
->color_key_red
= red
;
198 registers
->color_key_green
= green
;
199 registers
->color_key_blue
= blue
;
200 registers
->color_key_mask_red
= ~redMask
;
201 registers
->color_key_mask_green
= ~greenMask
;
202 registers
->color_key_mask_blue
= ~blueMask
;
203 registers
->color_key_enabled
= true;
208 set_color_key(const overlay_window
* window
)
210 switch (gInfo
->shared_info
->current_mode
.space
) {
212 set_color_key(0, 0, window
->blue
.value
, 0x0, 0x0, 0xff);
215 set_color_key(window
->red
.value
<< 3, window
->green
.value
<< 3,
216 window
->blue
.value
<< 3, window
->red
.mask
<< 3,
217 window
->green
.mask
<< 3, window
->blue
.mask
<< 3);
220 set_color_key(window
->red
.value
<< 3, window
->green
.value
<< 2,
221 window
->blue
.value
<< 3, window
->red
.mask
<< 3,
222 window
->green
.mask
<< 2, window
->blue
.mask
<< 3);
226 set_color_key(window
->red
.value
, window
->green
.value
,
227 window
->blue
.value
, window
->red
.mask
, window
->green
.mask
,
235 update_overlay(bool updateCoefficients
)
237 if (!gInfo
->shared_info
->overlay_active
238 || gInfo
->shared_info
->device_type
.InGroup(INTEL_TYPE_965
))
241 QueueCommands
queue(gInfo
->shared_info
->primary_ring_buffer
);
243 queue
.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP
);
244 queue
.PutOverlayFlip(COMMAND_OVERLAY_CONTINUE
, updateCoefficients
);
246 // make sure the flip is done now
247 queue
.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP
);
250 TRACE("%s: UP: %lx, TST: %lx, ST: %lx, CMD: %lx (%lx), ERR: %lx\n",
251 __func__
, read32(INTEL_OVERLAY_UPDATE
),
252 read32(INTEL_OVERLAY_TEST
), read32(INTEL_OVERLAY_STATUS
),
253 *(((uint32
*)gInfo
->overlay_registers
) + 0x68/4), read32(0x30168),
261 if (gInfo
->shared_info
->overlay_active
262 || gInfo
->shared_info
->device_type
.InGroup(INTEL_TYPE_965
))
265 gInfo
->shared_info
->overlay_active
= true;
266 gInfo
->overlay_registers
->overlay_enabled
= true;
268 QueueCommands
queue(gInfo
->shared_info
->primary_ring_buffer
);
269 queue
.PutOverlayFlip(COMMAND_OVERLAY_ON
, true);
272 TRACE("%s: UP: %lx, TST: %lx, ST: %lx, CMD: %lx (%lx), ERR: %lx\n",
273 __func__
, read32(INTEL_OVERLAY_UPDATE
),
274 read32(INTEL_OVERLAY_TEST
), read32(INTEL_OVERLAY_STATUS
),
275 *(((uint32
*)gInfo
->overlay_registers
) + 0x68/4),
276 read32(0x30168), read32(0x2024));
283 if (!gInfo
->shared_info
->overlay_active
284 || gInfo
->shared_info
->device_type
.InGroup(INTEL_TYPE_965
))
287 overlay_registers
* registers
= gInfo
->overlay_registers
;
289 gInfo
->shared_info
->overlay_active
= false;
290 registers
->overlay_enabled
= false;
292 QueueCommands
queue(gInfo
->shared_info
->primary_ring_buffer
);
294 // flush pending commands
296 queue
.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP
);
298 // clear overlay enabled bit
299 queue
.PutOverlayFlip(COMMAND_OVERLAY_CONTINUE
, false);
300 queue
.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP
);
302 // turn off overlay engine
303 queue
.PutOverlayFlip(COMMAND_OVERLAY_OFF
, false);
304 queue
.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP
);
306 gInfo
->current_overlay
= NULL
;
314 intel_overlay_count(const display_mode
* mode
)
316 // TODO: make this depending on the amount of RAM and the screen mode
317 // (and we could even have more than one when using 3D as well)
323 intel_overlay_supported_spaces(const display_mode
* mode
)
325 static const uint32 kSupportedSpaces
[] = {B_RGB15
, B_RGB16
, B_RGB32
,
327 static const uint32 kSupportedi965Spaces
[] = {B_YCbCr422
, 0};
328 intel_shared_info
&sharedInfo
= *gInfo
->shared_info
;
330 if (sharedInfo
.device_type
.InGroup(INTEL_TYPE_96x
))
331 return kSupportedi965Spaces
;
333 return kSupportedSpaces
;
338 intel_overlay_supported_features(uint32 colorSpace
)
340 return B_OVERLAY_COLOR_KEY
341 | B_OVERLAY_HORIZONTAL_FILTERING
342 | B_OVERLAY_VERTICAL_FILTERING
343 | B_OVERLAY_HORIZONTAL_MIRRORING
;
347 const overlay_buffer
*
348 intel_allocate_overlay_buffer(color_space colorSpace
, uint16 width
,
351 TRACE("%s(width %u, height %u, colorSpace %lu)\n", __func__
, width
,
354 intel_shared_info
&sharedInfo
= *gInfo
->shared_info
;
355 uint32 bytesPerPixel
;
357 switch (colorSpace
) {
374 struct overlay
* overlay
= (struct overlay
*)malloc(sizeof(struct overlay
));
380 // alloc graphics mem
382 int32 alignment
= 0x3f;
383 if (sharedInfo
.device_type
.InGroup(INTEL_TYPE_965
))
386 overlay_buffer
* buffer
= &overlay
->buffer
;
387 buffer
->space
= colorSpace
;
388 buffer
->width
= width
;
389 buffer
->height
= height
;
390 buffer
->bytes_per_row
= (width
* bytesPerPixel
+ alignment
) & ~alignment
;
392 status_t status
= intel_allocate_memory(buffer
->bytes_per_row
* height
,
393 0, overlay
->buffer_base
);
399 if (sharedInfo
.device_type
.InGroup(INTEL_TYPE_965
)) {
400 status
= intel_allocate_memory(INTEL_i965_OVERLAY_STATE_SIZE
,
401 B_APERTURE_NON_RESERVED
, overlay
->state_base
);
403 intel_free_memory(overlay
->buffer_base
);
408 overlay
->state_offset
= overlay
->state_base
409 - (addr_t
)gInfo
->shared_info
->graphics_memory
;
412 overlay
->buffer_offset
= overlay
->buffer_base
413 - (addr_t
)gInfo
->shared_info
->graphics_memory
;
415 buffer
->buffer
= (uint8
*)overlay
->buffer_base
;
416 buffer
->buffer_dma
= (uint8
*)gInfo
->shared_info
->physical_graphics_memory
417 + overlay
->buffer_offset
;
419 TRACE("%s: base=%x, offset=%x, address=%x, physical address=%x\n",
420 __func__
, overlay
->buffer_base
, overlay
->buffer_offset
,
421 buffer
->buffer
, buffer
->buffer_dma
);
428 intel_release_overlay_buffer(const overlay_buffer
* buffer
)
432 struct overlay
* overlay
= (struct overlay
*)buffer
;
436 if (gInfo
->current_overlay
== overlay
)
439 intel_free_memory(overlay
->buffer_base
);
440 if (gInfo
->shared_info
->device_type
.InGroup(INTEL_TYPE_965
))
441 intel_free_memory(overlay
->state_base
);
449 intel_get_overlay_constraints(const display_mode
* mode
,
450 const overlay_buffer
* buffer
, overlay_constraints
* constraints
)
454 // taken from the Radeon driver...
456 // scaler input restrictions
457 // TODO: check all these values; most of them are probably too restrictive
460 constraints
->view
.h_alignment
= 0;
461 constraints
->view
.v_alignment
= 0;
464 switch (buffer
->space
) {
466 constraints
->view
.width_alignment
= 7;
469 constraints
->view
.width_alignment
= 7;
472 constraints
->view
.width_alignment
= 3;
475 constraints
->view
.width_alignment
= 7;
478 constraints
->view
.width_alignment
= 7;
483 constraints
->view
.height_alignment
= 0;
486 constraints
->view
.width
.min
= 4; // make 4-tap filter happy
487 constraints
->view
.height
.min
= 4;
488 constraints
->view
.width
.max
= buffer
->width
;
489 constraints
->view
.height
.max
= buffer
->height
;
491 // scaler output restrictions
492 constraints
->window
.h_alignment
= 0;
493 constraints
->window
.v_alignment
= 0;
494 constraints
->window
.width_alignment
= 0;
495 constraints
->window
.height_alignment
= 0;
496 constraints
->window
.width
.min
= 2;
497 constraints
->window
.width
.max
= mode
->virtual_width
;
498 constraints
->window
.height
.min
= 2;
499 constraints
->window
.height
.max
= mode
->virtual_height
;
501 // TODO: the minimum values are not tested
502 constraints
->h_scale
.min
= 1.0f
/ (1 << 4);
503 constraints
->h_scale
.max
= buffer
->width
* 7;
504 constraints
->v_scale
.min
= 1.0f
/ (1 << 4);
505 constraints
->v_scale
.max
= buffer
->height
* 7;
512 intel_allocate_overlay(void)
516 // we only have a single overlay channel
517 if (atomic_or(&gInfo
->shared_info
->overlay_channel_used
, 1) != 0)
520 return (overlay_token
)++gInfo
->shared_info
->overlay_token
;
525 intel_release_overlay(overlay_token overlayToken
)
529 // we only have a single token, which simplifies this
530 if (overlayToken
!= (overlay_token
)gInfo
->shared_info
->overlay_token
)
533 atomic_and(&gInfo
->shared_info
->overlay_channel_used
, 0);
540 intel_configure_overlay(overlay_token overlayToken
,
541 const overlay_buffer
* buffer
, const overlay_window
* window
,
542 const overlay_view
* view
)
546 if (overlayToken
!= (overlay_token
)gInfo
->shared_info
->overlay_token
)
549 if (window
== NULL
|| view
== NULL
) {
554 struct overlay
* overlay
= (struct overlay
*)buffer
;
555 overlay_registers
* registers
= gInfo
->overlay_registers
;
556 bool updateCoefficients
= false;
557 uint32 bytesPerPixel
= 2;
559 switch (buffer
->space
) {
561 registers
->source_format
= OVERLAY_FORMAT_RGB15
;
564 registers
->source_format
= OVERLAY_FORMAT_RGB16
;
567 registers
->source_format
= OVERLAY_FORMAT_RGB32
;
571 registers
->source_format
= OVERLAY_FORMAT_YCbCr422
;
575 if (!gInfo
->shared_info
->overlay_active
576 || memcmp(&gInfo
->last_overlay_view
, view
, sizeof(overlay_view
))
577 || memcmp(&gInfo
->last_overlay_frame
, window
, sizeof(overlay_frame
))) {
578 // scaling has changed, program window and scaling factor
580 // clip the window to on screen bounds
581 // TODO: this is not yet complete or correct - especially if we start
582 // to support moving the display!
583 int32 left
, top
, right
, bottom
;
584 left
= window
->h_start
;
585 right
= window
->h_start
+ window
->width
;
586 top
= window
->v_start
;
587 bottom
= window
->v_start
+ window
->height
;
592 if (right
> gInfo
->shared_info
->current_mode
.timing
.h_display
)
593 right
= gInfo
->shared_info
->current_mode
.timing
.h_display
;
594 if (bottom
> gInfo
->shared_info
->current_mode
.timing
.v_display
)
595 bottom
= gInfo
->shared_info
->current_mode
.timing
.v_display
;
596 if (left
>= right
|| top
>= bottom
) {
597 // overlay is not within visible bounds
602 registers
->window_left
= left
;
603 registers
->window_top
= top
;
604 registers
->window_width
= right
- left
;
605 registers
->window_height
= bottom
- top
;
607 uint32 horizontalScale
= (view
->width
<< 12) / window
->width
;
608 uint32 verticalScale
= (view
->height
<< 12) / window
->height
;
609 uint32 horizontalScaleUV
= horizontalScale
>> 1;
610 uint32 verticalScaleUV
= verticalScale
>> 1;
611 horizontalScale
= horizontalScaleUV
<< 1;
612 verticalScale
= verticalScaleUV
<< 1;
614 // we need to offset the overlay view to adapt it to the clipping
615 // (in addition to whatever offset is desired already)
616 left
= view
->h_start
- (int32
)((window
->h_start
- left
)
617 * (horizontalScale
/ 4096.0) + 0.5);
618 top
= view
->v_start
- (int32
)((window
->v_start
- top
)
619 * (verticalScale
/ 4096.0) + 0.5);
620 right
= view
->h_start
+ view
->width
;
621 bottom
= view
->v_start
+ view
->height
;
623 gInfo
->overlay_position_buffer_offset
= buffer
->bytes_per_row
* top
624 + left
* bytesPerPixel
;
626 // Note: in non-planar mode, you *must* not program the source
627 // width/height UV registers - they must stay cleared, or the chip is
628 // doing strange stuff.
629 // On the other hand, you have to program the UV scaling registers, or
630 // the result will be wrong, too.
631 registers
->source_width_rgb
= right
- left
;
632 registers
->source_height_rgb
= bottom
- top
;
633 if (gInfo
->shared_info
->device_type
.InFamily(INTEL_TYPE_8xx
)) {
634 registers
->source_bytes_per_row_rgb
= (((overlay
->buffer_offset
635 + (view
->width
<< 1) + 0x1f) >> 5)
636 - (overlay
->buffer_offset
>> 5) - 1) << 2;
638 int yaddress
= overlay
->buffer_offset
;
639 int yswidth
= view
->width
<< 1;
640 registers
->source_bytes_per_row_rgb
= (((((yaddress
641 + yswidth
+ 0x3f) >> 6) - (yaddress
>> 6)) << 1) - 1) << 2;
644 // horizontal scaling
645 registers
->scale_rgb
.horizontal_downscale_factor
646 = horizontalScale
>> 12;
647 registers
->scale_rgb
.horizontal_scale_fraction
648 = horizontalScale
& 0xfff;
649 registers
->scale_uv
.horizontal_downscale_factor
650 = horizontalScaleUV
>> 12;
651 registers
->scale_uv
.horizontal_scale_fraction
652 = horizontalScaleUV
& 0xfff;
655 registers
->scale_rgb
.vertical_scale_fraction
= verticalScale
& 0xfff;
656 registers
->scale_uv
.vertical_scale_fraction
= verticalScaleUV
& 0xfff;
657 registers
->vertical_scale_rgb
= verticalScale
>> 12;
658 registers
->vertical_scale_uv
= verticalScaleUV
>> 12;
660 TRACE("scale: h = %ld.%ld, v = %ld.%ld\n", horizontalScale
>> 12,
661 horizontalScale
& 0xfff, verticalScale
>> 12,
662 verticalScale
& 0xfff);
664 if (verticalScale
!= gInfo
->last_vertical_overlay_scale
665 || horizontalScale
!= gInfo
->last_horizontal_overlay_scale
) {
666 // Recompute phase coefficients (taken from X driver)
667 updateCoefficients
= true;
669 phase_coefficient coefficients
[NUM_HORIZONTAL_TAPS
* NUM_PHASES
];
670 update_coefficients(NUM_HORIZONTAL_TAPS
, horizontalScale
/ 4096.0,
671 true, true, coefficients
);
673 phase_coefficient coefficientsUV
[
674 NUM_HORIZONTAL_UV_TAPS
* NUM_PHASES
];
675 update_coefficients(NUM_HORIZONTAL_UV_TAPS
,
676 horizontalScaleUV
/ 4096.0, true, false, coefficientsUV
);
679 for (int32 i
= 0; i
< NUM_PHASES
; i
++) {
680 for (int32 j
= 0; j
< NUM_HORIZONTAL_TAPS
; j
++) {
681 registers
->horizontal_coefficients_rgb
[pos
]
682 = coefficients
[pos
].sign
<< 15
683 | coefficients
[pos
].exponent
<< 12
684 | coefficients
[pos
].mantissa
;
690 for (int32 i
= 0; i
< NUM_PHASES
; i
++) {
691 for (int32 j
= 0; j
< NUM_HORIZONTAL_UV_TAPS
; j
++) {
692 registers
->horizontal_coefficients_uv
[pos
]
693 = coefficientsUV
[pos
].sign
<< 15
694 | coefficientsUV
[pos
].exponent
<< 12
695 | coefficientsUV
[pos
].mantissa
;
700 gInfo
->last_vertical_overlay_scale
= verticalScale
;
701 gInfo
->last_horizontal_overlay_scale
= horizontalScale
;
704 gInfo
->last_overlay_view
= *view
;
705 gInfo
->last_overlay_frame
= *(overlay_frame
*)window
;
708 registers
->color_control_output_mode
= true;
709 registers
->select_pipe
= 0;
713 registers
->buffer_rgb0
714 = overlay
->buffer_offset
+ gInfo
->overlay_position_buffer_offset
;
715 registers
->stride_rgb
= buffer
->bytes_per_row
;
717 registers
->mirroring_mode
718 = (window
->flags
& B_OVERLAY_HORIZONTAL_MIRRORING
) != 0
719 ? OVERLAY_MIRROR_HORIZONTAL
: OVERLAY_MIRROR_NORMAL
;
720 registers
->ycbcr422_order
= 0;
722 if (!gInfo
->shared_info
->overlay_active
) {
723 // overlay is shown for the first time
724 set_color_key(window
);
727 update_overlay(updateCoefficients
);
729 gInfo
->current_overlay
= overlay
;