src/add-ons/accelerants/radeon/EngineManagment.c

   1 /*
   2         Copyright (c) 2002, Thomas Kurschel
   3
   4
   5         Part of Radeon accelerant
   6
   7         Hardware accelerator management
   8
   9         All accelerator commands go through the following steps:
  10         - accelerant adds command to CP buffer and updates CP write pointer
  11         - CP fetches command and sends it to MicroController
  12         - MicroController instructs 2D unit to execute command
  13         - 2D unit draws into 2D Destination Cache (DC)
  14         - 2D Destination Cache is drained to frame buffer
  15
  16         Whenever a token is required by BeOS, a command is queued to write
  17         the timestamp into Scratch Register 0. I haven't fully understand
  18         when and how coherancy is assured by Radeon, so I assume the following:
  19         - when the timestamp is written, all previous commands have been issued,
  20           i.e. they are read and executed by the microcontroller
  21         - to make sure previously issued 2D commands have been finished,
  22           a WAIT_2D_IDLECLEAN command is inserted before the scratch register
  23           write
  24         - to flush the destination cache, a RB2D_DC_FLUSH_ALL command is
  25           issued before the wait; I hope that the wait command also waits for
  26           the flush command, but I'm not sure about that
  27
  28         Remains the cache coherency problem. It you can set various bits in
  29         DSTCACHE_MODE register to assure that, but first I don't really understand
  30         them, and second I'm not sure which other caches/FIFO may make trouble.
  31         Especially, Be wants to use CPU and CP accesses in parallel. Hopefully,
  32         they don't interfere.
  33
  34         I know that the PAINT_MULTI commands makes trouble if you change the
  35         ROP to something else: CPU writes produce garbage in frame buffer for the
  36         next couple of accesses. Resetting the ROP to a simply copy helps, but
  37         I'm not sure what happens with concurrent CPU accesses to other areas
  38         of the frame buffer.
  39 */
  40
  41
  42 #include "radeon_accelerant.h"
  43 #include "generic.h"
  44 #include "rbbm_regs.h"
  45 #include "GlobalData.h"
  46 #include "mmio.h"
  47 #include "CP.h"
  48
  49 static engine_token radeon_engine_token = { 1, B_2D_ACCELERATION, NULL };
  50
  51 // public function: return number of hardware engine
  52 uint32 ACCELERANT_ENGINE_COUNT(void)
  53 {
  54         // hm, is there *any* card sporting more then
  55         // one hardware accelerator???
  56         return 1;
  57 }
  58
  59 // write current sync token into CP stream;
  60 // we instruct the CP to flush all kind of cache first to not interfere
  61 // with subsequent host writes
  62 static void writeSyncToken( accelerator_info *ai )
  63 {
  64         // don't write token if it hasn't changed since last write
  65         if( ai->si->engine.count == ai->si->engine.written )
  66                 return;
  67
  68         if( ai->si->acc_dma ) {
  69                 START_IB();
  70
  71                 // flush pending data
  72                 WRITE_IB_REG( RADEON_RB2D_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL );
  73
  74                 // make sure commands are finished
  75                 WRITE_IB_REG( RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN |
  76                         RADEON_WAIT_3D_IDLECLEAN | RADEON_WAIT_HOST_IDLECLEAN );
  77
  78                 // write scratch register
  79                 WRITE_IB_REG( RADEON_SCRATCH_REG0, ai->si->engine.count );
  80
  81                 ai->si->engine.written = ai->si->engine.count;
  82
  83                 SUBMIT_IB();
  84         } else {
  85                 Radeon_WaitForFifo( ai, 2 );
  86                 OUTREG( ai->regs, RADEON_RB2D_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
  87                 OUTREG( ai->regs, RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN |
  88                    RADEON_WAIT_3D_IDLECLEAN |
  89                    RADEON_WAIT_HOST_IDLECLEAN);
  90                 ai->si->engine.written = ai->si->engine.count;
  91         }
  92 }
  93
  94 // public function: acquire engine for future use
  95 //      capabilites - required 2D/3D capabilities of engine, ignored
  96 //      max_wait - maximum time we want to wait (in ms?), ignored
  97 //      st - when engine has been acquired, wait for this sync token
  98 //      et - (out) specifier of the engine acquired
  99 status_t ACQUIRE_ENGINE( uint32 capabilities, uint32 max_wait,
 100         sync_token *st, engine_token **et )
 101 {
 102         shared_info *si = ai->si;
 103
 104         SHOW_FLOW0( 4, "" );
 105
 106         (void)capabilities;
 107         (void)max_wait;
 108
 109         ACQUIRE_BEN( si->engine.lock)
 110
 111         // wait for sync
 112         if (st)
 113                 SYNC_TO_TOKEN( st );
 114
 115         *et = &radeon_engine_token;
 116         return B_OK;
 117 }
 118
 119 // public function: release accelerator
 120 //      et - engine to release
 121 //      st - (out) sync token to be filled out
 122 status_t RELEASE_ENGINE( engine_token *et, sync_token *st )
 123 {
 124         shared_info *si = ai->si;
 125
 126         SHOW_FLOW0( 4, "" );
 127
 128         // fill out sync token
 129         if (st) {
 130                 writeSyncToken( ai );
 131
 132                 st->engine_id = et->engine_id;
 133                 st->counter = si->engine.count;
 134         }
 135
 136         RELEASE_BEN( ai->si->engine.lock )
 137
 138         return B_OK;
 139 }
 140
 141 // public function: wait until engine is idle
 142 // ??? which engine to wait for? Is there anyone using this function?
 143 //     is lock hold?
 144 void WAIT_ENGINE_IDLE(void)
 145 {
 146         SHOW_FLOW0( 4, "" );
 147
 148         Radeon_WaitForIdle( ai, false );
 149 }
 150
 151 // public function: get sync token
 152 //      et - engine to wait for
 153 //      st - (out) sync token to be filled out
 154 status_t GET_SYNC_TOKEN( engine_token *et, sync_token *st )
 155 {
 156         shared_info *si = ai->si;
 157
 158         SHOW_FLOW0( 4, "" );
 159
 160         writeSyncToken( ai );
 161
 162         st->engine_id = et->engine_id;
 163         st->counter = si->engine.count;
 164
 165         SHOW_FLOW( 4, "got counter=%d", si->engine.count );
 166
 167         return B_OK;
 168 }
 169
 170 // this is the same as the corresponding kernel function
 171 void Radeon_Spin( uint32 delay )
 172 {
 173         bigtime_t start_time;
 174
 175         start_time = system_time();
 176
 177         while( system_time() - start_time < delay )
 178                 ;
 179 }
 180
 181 // public: sync to token
 182 //      st - token to wait for
 183 status_t SYNC_TO_TOKEN( sync_token *st )
 184 {
 185         shared_info *si = ai->si;
 186         bigtime_t start_time, sample_time;
 187
 188         SHOW_FLOW0( 4, "" );
 189
 190         if ( !ai->si->acc_dma )
 191         {
 192                 Radeon_WaitForFifo( ai, 64 );
 193                 Radeon_WaitForIdle( ai, false );
 194                 return B_OK;
 195         }
 196
 197         start_time = system_time();
 198
 199         while( 1 ) {
 200                 SHOW_FLOW( 4, "passed counter=%d",
 201                         ((uint32 *)(ai->mapped_memory[si->cp.feedback.mem_type].data + si->cp.feedback.scratch_mem_offset))[0] );
 202                         //si->cp.scratch.ptr[0] );
 203
 204                 // a bit nasty: counter is 64 bit, but we have 32 bit only,
 205                 // this is a tricky calculation to handle wrap-arounds correctly
 206                 if( (int32)(
 207                         ((uint32 *)(ai->mapped_memory[si->cp.feedback.mem_type].data + si->cp.feedback.scratch_mem_offset))[0]
 208                         //si->cp.scratch.ptr[0]
 209                         - st->counter) >= 0 )
 210                         return B_OK;
 211                 /*if( (int32)(INREG( ai->regs, RADEON_SCRATCH_REG0 ) - st->counter) >= 0 )
 212                         return B_OK;*/
 213
 214                 // commands have not been finished;
 215                 // this is a good time to free completed buffers as we have to
 216                 // busy-wait anyway
 217                 ACQUIRE_BEN( si->cp.lock );
 218                 Radeon_FreeIndirectBuffers( ai );
 219                 RELEASE_BEN( si->cp.lock );
 220
 221                 sample_time = system_time();
 222
 223                 if( sample_time - start_time > 100000 )
 224                         break;
 225
 226                 // use exponential fall-off
 227                 // in the beginning do busy-waiting, later on we let thread sleep
 228                 // the micro-spin is used to reduce PCI load
 229                 if( sample_time - start_time > 5000 )
 230                         snooze( (sample_time - start_time) / 10 );
 231                 else
 232                         Radeon_Spin( 1 );
 233         }
 234
 235         // we could reset engine now, but caller doesn't need to acquire
 236         // engine before calling this function, so we either reset it
 237         // without sync (ouch!) or acquire engine first and risk deadlocking
 238         SHOW_ERROR( 0, "Failed waiting for token %d (active token: %d)",
 239                 st->counter, /*INREG( ai->regs, RADEON_SCRATCH_REG0 )*/
 240                 ((uint32 *)(ai->mapped_memory[si->cp.feedback.mem_type].data + si->cp.feedback.scratch_mem_offset))[0] );
 241                 //si->cp.scratch.ptr[0] );
 242
 243         Radeon_ResetEngine( ai );
 244
 245         return B_ERROR;
 246 }