2 Copyright (c) 2002, Thomas Kurschel
5 Part of Radeon accelerant
7 Hardware accelerator management
9 All accelerator commands go through the following steps:
10 - accelerant adds command to CP buffer and updates CP write pointer
11 - CP fetches command and sends it to MicroController
12 - MicroController instructs 2D unit to execute command
13 - 2D unit draws into 2D Destination Cache (DC)
14 - 2D Destination Cache is drained to frame buffer
16 Whenever a token is required by BeOS, a command is queued to write
17 the timestamp into Scratch Register 0. I haven't fully understand
18 when and how coherancy is assured by Radeon, so I assume the following:
19 - when the timestamp is written, all previous commands have been issued,
20 i.e. they are read and executed by the microcontroller
21 - to make sure previously issued 2D commands have been finished,
22 a WAIT_2D_IDLECLEAN command is inserted before the scratch register
24 - to flush the destination cache, a RB2D_DC_FLUSH_ALL command is
25 issued before the wait; I hope that the wait command also waits for
26 the flush command, but I'm not sure about that
28 Remains the cache coherency problem. It you can set various bits in
29 DSTCACHE_MODE register to assure that, but first I don't really understand
30 them, and second I'm not sure which other caches/FIFO may make trouble.
31 Especially, Be wants to use CPU and CP accesses in parallel. Hopefully,
34 I know that the PAINT_MULTI commands makes trouble if you change the
35 ROP to something else: CPU writes produce garbage in frame buffer for the
36 next couple of accesses. Resetting the ROP to a simply copy helps, but
37 I'm not sure what happens with concurrent CPU accesses to other areas
42 #include "radeon_accelerant.h"
44 #include "rbbm_regs.h"
45 #include "GlobalData.h"
49 static engine_token radeon_engine_token
= { 1, B_2D_ACCELERATION
, NULL
};
51 // public function: return number of hardware engine
52 uint32
ACCELERANT_ENGINE_COUNT(void)
54 // hm, is there *any* card sporting more then
55 // one hardware accelerator???
59 // write current sync token into CP stream;
60 // we instruct the CP to flush all kind of cache first to not interfere
61 // with subsequent host writes
62 static void writeSyncToken( accelerator_info
*ai
)
64 // don't write token if it hasn't changed since last write
65 if( ai
->si
->engine
.count
== ai
->si
->engine
.written
)
68 if( ai
->si
->acc_dma
) {
72 WRITE_IB_REG( RADEON_RB2D_DSTCACHE_CTLSTAT
, RADEON_RB2D_DC_FLUSH_ALL
);
74 // make sure commands are finished
75 WRITE_IB_REG( RADEON_WAIT_UNTIL
, RADEON_WAIT_2D_IDLECLEAN
|
76 RADEON_WAIT_3D_IDLECLEAN
| RADEON_WAIT_HOST_IDLECLEAN
);
78 // write scratch register
79 WRITE_IB_REG( RADEON_SCRATCH_REG0
, ai
->si
->engine
.count
);
81 ai
->si
->engine
.written
= ai
->si
->engine
.count
;
85 Radeon_WaitForFifo( ai
, 2 );
86 OUTREG( ai
->regs
, RADEON_RB2D_DSTCACHE_CTLSTAT
, RADEON_RB2D_DC_FLUSH_ALL
);
87 OUTREG( ai
->regs
, RADEON_WAIT_UNTIL
, RADEON_WAIT_2D_IDLECLEAN
|
88 RADEON_WAIT_3D_IDLECLEAN
|
89 RADEON_WAIT_HOST_IDLECLEAN
);
90 ai
->si
->engine
.written
= ai
->si
->engine
.count
;
94 // public function: acquire engine for future use
95 // capabilites - required 2D/3D capabilities of engine, ignored
96 // max_wait - maximum time we want to wait (in ms?), ignored
97 // st - when engine has been acquired, wait for this sync token
98 // et - (out) specifier of the engine acquired
99 status_t
ACQUIRE_ENGINE( uint32 capabilities
, uint32 max_wait
,
100 sync_token
*st
, engine_token
**et
)
102 shared_info
*si
= ai
->si
;
109 ACQUIRE_BEN( si
->engine
.lock
)
115 *et
= &radeon_engine_token
;
119 // public function: release accelerator
120 // et - engine to release
121 // st - (out) sync token to be filled out
122 status_t
RELEASE_ENGINE( engine_token
*et
, sync_token
*st
)
124 shared_info
*si
= ai
->si
;
128 // fill out sync token
130 writeSyncToken( ai
);
132 st
->engine_id
= et
->engine_id
;
133 st
->counter
= si
->engine
.count
;
136 RELEASE_BEN( ai
->si
->engine
.lock
)
141 // public function: wait until engine is idle
142 // ??? which engine to wait for? Is there anyone using this function?
144 void WAIT_ENGINE_IDLE(void)
148 Radeon_WaitForIdle( ai
, false );
151 // public function: get sync token
152 // et - engine to wait for
153 // st - (out) sync token to be filled out
154 status_t
GET_SYNC_TOKEN( engine_token
*et
, sync_token
*st
)
156 shared_info
*si
= ai
->si
;
160 writeSyncToken( ai
);
162 st
->engine_id
= et
->engine_id
;
163 st
->counter
= si
->engine
.count
;
165 SHOW_FLOW( 4, "got counter=%d", si
->engine
.count
);
170 // this is the same as the corresponding kernel function
171 void Radeon_Spin( uint32 delay
)
173 bigtime_t start_time
;
175 start_time
= system_time();
177 while( system_time() - start_time
< delay
)
181 // public: sync to token
182 // st - token to wait for
183 status_t
SYNC_TO_TOKEN( sync_token
*st
)
185 shared_info
*si
= ai
->si
;
186 bigtime_t start_time
, sample_time
;
190 if ( !ai
->si
->acc_dma
)
192 Radeon_WaitForFifo( ai
, 64 );
193 Radeon_WaitForIdle( ai
, false );
197 start_time
= system_time();
200 SHOW_FLOW( 4, "passed counter=%d",
201 ((uint32
*)(ai
->mapped_memory
[si
->cp
.feedback
.mem_type
].data
+ si
->cp
.feedback
.scratch_mem_offset
))[0] );
202 //si->cp.scratch.ptr[0] );
204 // a bit nasty: counter is 64 bit, but we have 32 bit only,
205 // this is a tricky calculation to handle wrap-arounds correctly
207 ((uint32
*)(ai
->mapped_memory
[si
->cp
.feedback
.mem_type
].data
+ si
->cp
.feedback
.scratch_mem_offset
))[0]
208 //si->cp.scratch.ptr[0]
209 - st
->counter
) >= 0 )
211 /*if( (int32)(INREG( ai->regs, RADEON_SCRATCH_REG0 ) - st->counter) >= 0 )
214 // commands have not been finished;
215 // this is a good time to free completed buffers as we have to
217 ACQUIRE_BEN( si
->cp
.lock
);
218 Radeon_FreeIndirectBuffers( ai
);
219 RELEASE_BEN( si
->cp
.lock
);
221 sample_time
= system_time();
223 if( sample_time
- start_time
> 100000 )
226 // use exponential fall-off
227 // in the beginning do busy-waiting, later on we let thread sleep
228 // the micro-spin is used to reduce PCI load
229 if( sample_time
- start_time
> 5000 )
230 snooze( (sample_time
- start_time
) / 10 );
235 // we could reset engine now, but caller doesn't need to acquire
236 // engine before calling this function, so we either reset it
237 // without sync (ouch!) or acquire engine first and risk deadlocking
238 SHOW_ERROR( 0, "Failed waiting for token %d (active token: %d)",
239 st
->counter
, /*INREG( ai->regs, RADEON_SCRATCH_REG0 )*/
240 ((uint32
*)(ai
->mapped_memory
[si
->cp
.feedback
.mem_type
].data
+ si
->cp
.feedback
.scratch_mem_offset
))[0] );
241 //si->cp.scratch.ptr[0] );
243 Radeon_ResetEngine( ai
);