revert 213 commits (to 56092) from the last month. 10 still need work to resolve...
[AROS.git] / workbench / libs / mesa / src / gallium / drivers / nvfx / nv04_2d.c
blobe2fadd33e1c7d81c624b57a915260d333efef9dd
1 /**************************************************************************
3 * Copyright 2009 Ben Skeggs
4 * Copyright 2009 Younes Manton
5 * Copyright 2010 Luca Barbieri
6 * All Rights Reserved.
8 * Permission is hereby granted, free of charge, to any person obtaining
9 * a copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
20 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * The above copyright notice and this permission notice (including the
26 * next paragraph) shall be included in all copies or substantial portions
27 * of the Software.
29 **************************************************************************/
31 /* this code has no Mesa or Gallium dependency and can be reused in the classic Mesa driver or DDX */
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <stdint.h>
36 #include <nouveau/nouveau_device.h>
37 #include <nouveau/nouveau_channel.h>
38 #include <nouveau/nouveau_bo.h>
39 #include <nouveau/nouveau_notifier.h>
40 #include <nouveau/nouveau_grobj.h>
41 #include <nouveau/nv04_pushbuf.h>
42 #include "nv04_2d.h"
44 #include "nouveau/nv_object.xml.h"
45 #include "nouveau/nv_m2mf.xml.h"
46 #include "nv01_2d.xml.h"
48 /* avoid depending on Mesa/Gallium */
49 #ifdef __GNUC__
50 #define likely(x) __builtin_expect(!!(x), 1)
51 #define unlikely(x) __builtin_expect(!!(x), 0)
52 #else
53 #define likely(x) !!(x)
54 #define unlikely(x) !!(x)
55 #endif
57 #define MIN2( A, B ) ( (A)<(B) ? (A) : (B) )
58 #define MAX2( A, B ) ( (A)>(B) ? (A) : (B) )
60 struct nv04_2d_context
62 struct nouveau_notifier *ntfy;
63 struct nouveau_grobj *surf2d;
64 struct nouveau_grobj *swzsurf;
65 struct nouveau_grobj *m2mf;
66 struct nouveau_grobj *rect;
67 struct nouveau_grobj *sifm;
68 struct nouveau_grobj *blit;
71 static inline int
72 align(int value, int alignment)
74 return (value + alignment - 1) & ~(alignment - 1);
77 static inline int
78 util_is_pot(unsigned x)
80 return (x & (x - 1)) == 0;
83 /* Integer base-2 logarithm, rounded towards zero. */
84 static inline unsigned log2i(unsigned i)
86 unsigned r = 0;
88 if (i & 0xffff0000) {
89 i >>= 16;
90 r += 16;
92 if (i & 0x0000ff00) {
93 i >>= 8;
94 r += 8;
96 if (i & 0x000000f0) {
97 i >>= 4;
98 r += 4;
100 if (i & 0x0000000c) {
101 i >>= 2;
102 r += 2;
104 if (i & 0x00000002) {
105 r += 1;
107 return r;
110 //#define NV04_REGION_DEBUG
112 // Yes, we really want to inline everything, since all the functions are used only once
113 #if defined(__GNUC__) && !defined(DEBUG)
114 #define inline __attribute__((always_inline)) inline
115 #endif
117 static inline unsigned
118 nv04_swizzle_bits_square(unsigned x, unsigned y)
120 unsigned u = (x & 0x001) << 0 |
121 (x & 0x002) << 1 |
122 (x & 0x004) << 2 |
123 (x & 0x008) << 3 |
124 (x & 0x010) << 4 |
125 (x & 0x020) << 5 |
126 (x & 0x040) << 6 |
127 (x & 0x080) << 7 |
128 (x & 0x100) << 8 |
129 (x & 0x200) << 9 |
130 (x & 0x400) << 10 |
131 (x & 0x800) << 11;
133 unsigned v = (y & 0x001) << 1 |
134 (y & 0x002) << 2 |
135 (y & 0x004) << 3 |
136 (y & 0x008) << 4 |
137 (y & 0x010) << 5 |
138 (y & 0x020) << 6 |
139 (y & 0x040) << 7 |
140 (y & 0x080) << 8 |
141 (y & 0x100) << 9 |
142 (y & 0x200) << 10 |
143 (y & 0x400) << 11 |
144 (y & 0x800) << 12;
145 return v | u;
148 /* rectangular swizzled textures are linear concatenations of swizzled square tiles */
149 static inline unsigned
150 nv04_swizzle_bits_2d(unsigned x, unsigned y, unsigned w, unsigned h)
152 if(h <= 1)
153 return x;
154 else
156 unsigned s = MIN2(w, h);
157 unsigned m = s - 1;
158 return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m);
162 // general 3D texture case
163 static inline unsigned
164 nv04_swizzle_bits(unsigned x, unsigned y, unsigned z, unsigned w, unsigned h, unsigned d)
166 if(d <= 1)
167 return nv04_swizzle_bits_2d(x, y, w, h);
168 else
170 // TODO: autogenerate code for all possible texture sizes (13 * 13 * 13 with dims <= 4096) and do a single indirect call
171 unsigned v = 0;
172 w >>= 1;
173 h >>= 1;
174 d >>= 1;
175 for(int i = 0;;)
177 int oldi = i;
178 if(likely(w))
180 v |= (x & 1) << i;
181 x >>= 1;
182 w >>= 1;
183 ++i;
186 if(likely(h))
188 v |= (y & 1) << i;
189 y >>= 1;
190 h >>= 1;
191 ++i;
194 if(likely(d))
196 v |= (z & 1) << i;
197 z >>= 1;
198 d >>= 1;
199 ++i;
202 if(i == oldi)
203 break;
205 return v;
209 unsigned
210 nv04_region_begin(struct nv04_region* rgn, unsigned w, unsigned h)
212 if(rgn->pitch)
213 return rgn->pitch * rgn->y + (rgn->x << rgn->bpps);
214 else
215 return nv04_swizzle_bits(rgn->x, rgn->y, rgn->z, rgn->w, rgn->h, rgn->d) << rgn->bpps;
218 unsigned
219 nv04_region_end(struct nv04_region* rgn, unsigned w, unsigned h)
221 if(rgn->pitch)
222 return rgn->pitch * (rgn->y + h - 1) + ((rgn->x + w) << rgn->bpps);
223 else
224 return (nv04_swizzle_bits(rgn->x + w - 1, rgn->y + h - 1, rgn->z, rgn->w, rgn->h, rgn->d) + 1) << rgn->bpps;
227 // *pitch = -1 -> use 3D swizzling for (x, y), *pitch = 0 -> use 2D swizzling, other *pitch -> use linear calculations
228 // returns 2 if pixel order is 3D-swizzled and 1 if subrect is 2D-swizzled
229 /* *pitch == -1 ret = 0 -> 3D swizzled subrect
230 * *pitch == 0 ret = 0 -> 2D swizzled subrect
231 * *pitch > 0 ret = 0 -> linear subrect
232 * *pitch > 0 ret = 1 -> linear subrect, but with swizzled 3D data inside
235 static inline void
236 nv04_region_print(struct nv04_region* rgn)
238 fprintf(stderr, "<%i[%i]> ", rgn->bo->handle, rgn->offset);
239 if(rgn->pitch)
240 fprintf(stderr, "lin %i", rgn->pitch);
241 else
242 fprintf(stderr, "swz %ix%ix%i", rgn->w, rgn->h, rgn->d);
243 fprintf(stderr, " (%i, %i, %i)", rgn->x, rgn->y, rgn->z);
246 static inline void
247 nv04_region_assert(struct nv04_region* rgn, unsigned w, unsigned h)
249 unsigned end = rgn->offset + nv04_region_end(rgn, w, h);
251 assert(rgn->offset <= (int)rgn->bo->size);
252 assert(end <= rgn->bo->size);
253 (void) end;
254 if(!rgn->pitch) {
255 assert(util_is_pot(rgn->w));
256 assert(util_is_pot(rgn->h));
260 /* determine if region can be linearized or fake-linearized */
261 static inline int
262 nv04_region_is_contiguous(struct nv04_region* rgn, int w, int h)
264 int surf_min;
265 int rect_min;
267 if(rgn->pitch)
268 return rgn->pitch == w << rgn->bpps;
270 // redundant, but this is the fast path for the common case
271 if(w == rgn->w && h == rgn->h && rgn->d <= 1)
272 return 1;
274 // must be POT
275 if((w & (w - 1)) || (h & (h - 1)))
276 return 0;
278 // must be aligned
279 if((rgn->x & (w - 1)) || (rgn->y & (h - 1)))
280 return 0;
282 if(rgn->d > 1)
283 return 0;
285 surf_min = MIN2(rgn->w, rgn->h);
286 rect_min = MIN2(w, h);
288 if((rect_min == surf_min) || (w == h) || (w == 2 * h))
289 return 1;
291 return 0;
294 // double the pitch until it is larger than the alignment, or the height becomes odd or 1
295 static inline void
296 nv04_region_contiguous_shape(struct nv04_region* rgn, int* w, int* h, int align)
298 while(!(*h & 1) && (*w << rgn->bpps) < (1 << align))
300 *w <<= 1;
301 *h >>= 1;
304 while((*w << rgn->bpps) > 16384 && !(*w & 1))
306 *w >>= 1;
307 *h <<= 1;
310 #ifdef NV04_REGION_DEBUG
311 fprintf(stderr, "\tCONTIGUOUS %ix%i\n", *w, *h);
312 #endif
315 static inline void
316 nv04_region_linearize_contiguous(struct nv04_region* rgn, unsigned w, unsigned h)
318 int pos;
319 if(rgn->pitch)
321 rgn->offset += rgn->y * rgn->pitch + (rgn->x << rgn->bpps);
322 rgn->x = 0;
323 rgn->y = 0;
325 else
327 rgn->offset += (rgn->w * rgn->h * rgn->z) << rgn->bpps;
328 pos = nv04_swizzle_bits(rgn->x, rgn->y, rgn->z, rgn->w, rgn->h, rgn->d);
329 rgn->x = pos & (w - 1);
330 rgn->y = pos / w;
332 rgn->pitch = w << rgn->bpps;
334 #ifdef NV04_REGION_DEBUG
335 fprintf(stderr, "\tLINEARIZE ");
336 nv04_region_print(rgn);
337 fprintf(stderr, "\n");
338 #endif
341 /* preserve the offset! */
343 rgn->pitch = util_format_get_stride(rgn->format, w);
344 int pos = nv04_swizzle_bits(rgn->x, rgn->y, rgn->z, rgn->w, rgn->h, rgn->d);
345 rgn->x = pos & (w - 1);
346 rgn->y = pos & ~(w - 1);
350 rgn->offset +=
351 rgn->pitch = util_format_get_stride(rgn->format, w);
352 rgn->x = 0;
353 rgn->y = 0;
356 /* This code will get used for, and always succeed on:
357 * - 4x2 1bpp swizzled texture mipmap levels
358 * - linear regions created by linearization
360 * This code will get used for, and MAY work for:
361 * - misaligned texture blanket
362 * - linear surfaces created without wide_pitch (in this case, it will only work if we are lucky)
364 * The general case requires splitting the region in 2.
366 static inline int
367 nv04_region_do_align_offset(struct nv04_region* rgn, unsigned w, unsigned h, int shift)
369 if(rgn->pitch > 0)
371 assert(!(rgn->offset & ((1 << rgn->bpps) - 1))); // fatal!
373 if(h <= 1)
375 int delta;
376 rgn->offset += rgn->y * rgn->pitch + (rgn->x << rgn->bpps);
377 delta = rgn->offset & ((1 << shift) - 1);
378 rgn->y = 0;
379 rgn->x = delta >> rgn->bpps;
380 rgn->offset -= delta;
381 rgn->pitch = align((rgn->x + w) << rgn->bpps, 1 << shift);
383 else
385 int delta = rgn->offset & ((1 << shift) - 1);
386 int newxo = (rgn->x << rgn->bpps) + delta;
387 int dy = newxo / rgn->pitch;
388 newxo -= dy * rgn->pitch;
389 if((newxo + (w << rgn->bpps)) > rgn->pitch)
391 // TODO: split the region into two rectangles (!) if *really* necessary, unless the hardware actually supports "wrapping" rectangles
392 // this does not happen if the surface is pitch-aligned, which it should always be
393 assert(0);
394 return -1;
396 rgn->x = newxo >> rgn->bpps;
397 rgn->y += dy;
400 else
402 int size;
403 int min;
404 int v;
406 // we don't care about the alignment of 3D surfaces since the 2D engine can't use them
407 if(rgn->d < 0)
408 return -1;
410 min = MIN2(rgn->w, rgn->h);
411 size = min * min << rgn->bpps;
413 // this is unfixable, and should not be happening
414 if(rgn->offset & (size - 1))
415 return -1;
417 v = (rgn->offset & ((1 << shift) - 1)) / size;
418 rgn->offset -= v * size;
420 if(rgn->h == min)
422 unsigned w;
423 rgn->x += rgn->h * v;
424 w = rgn->w + rgn->h * v;
426 while(rgn->w < w)
427 rgn->w += rgn->w;
429 else
431 unsigned h;
432 rgn->y += rgn->w * v;
433 h = rgn->h + rgn->w * v;
435 while(rgn->h < h)
436 rgn->h += rgn->h;
440 #ifdef NV04_REGION_DEBUG
441 fprintf(stderr, "\tALIGNED ");
442 nv04_region_print(rgn);
443 fprintf(stderr, "\n");
444 #endif
445 return 0;
448 // both pitch and shift
449 // will leave the region unchanged if it fails
450 static inline int
451 nv04_region_align(struct nv04_region* rgn, unsigned w, unsigned h, int shift)
453 if(rgn->pitch & ((1 << shift) - 1))
455 if(h == 1)
456 goto do_align; /* this will fix pitch too in this case */
457 else
458 return -1;
461 if(rgn->offset & ((1 << shift) - 1))
463 do_align:
464 if(nv04_region_do_align_offset(rgn, w, h, shift))
465 return -1;
467 return 0;
470 /* this contains 22 different copy loops after preprocessing. unfortunately, it's necessary */
471 void
472 nv04_region_copy_cpu(struct nv04_region* dst, struct nv04_region* src, int w, int h)
474 uint8_t* mdst;
475 uint8_t* msrc;
476 int size;
478 if(dst->bo != src->bo)
480 nouveau_bo_map(dst->bo, NOUVEAU_BO_WR);
481 nouveau_bo_map(src->bo, NOUVEAU_BO_RD);
483 else
484 nouveau_bo_map(dst->bo, NOUVEAU_BO_WR | NOUVEAU_BO_RD);
486 mdst = (uint8_t*)dst->bo->map + dst->offset;
487 msrc = (uint8_t*)src->bo->map + src->offset;
489 size = w << dst->bpps;
491 nv04_region_assert(dst, w, h);
492 nv04_region_assert(src, w, h);
494 #ifdef NV04_REGION_DEBUG
495 fprintf(stderr, "\tRGN_COPY_CPU [%i, %i: %i] ", w, h, dst->bpps);
496 for(int i = 0; i < 2; ++i)
498 nv04_region_print(i ? src : dst);
499 fprintf(stderr, i ? "\n" : " <- ");
502 // for(int i = 0; i < 16; ++i)
503 // fprintf(stderr, "%02x ", msrc[i]);
504 // fprintf(stderr, "\n");
505 #endif
507 // TODO: support overlapping copies!
508 if(src->pitch && dst->pitch)
510 mdst += dst->y * dst->pitch + (dst->x << dst->bpps);
511 msrc += src->y * src->pitch + (src->x << src->bpps);
512 if(dst->bo != src->bo)
513 goto simple;
514 else if(mdst < msrc)
516 if(mdst + size <= msrc)
518 simple:
519 for(int iy = 0; iy < h; ++iy)
521 assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
522 assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size);
523 memcpy(mdst, msrc, size);
524 msrc += src->pitch; mdst += dst->pitch;
527 else
529 for(int iy = 0; iy < h; ++iy)
531 assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
532 assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size);
533 memmove(mdst, msrc, size);
534 msrc += src->pitch; mdst += dst->pitch;
538 else
540 /* copy backwards so we don't destroy data we have to read yet */
541 if(msrc + size <= mdst)
543 for(int iy = h - 1; iy >= 0; --iy)
545 assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
546 assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size);
547 memcpy(mdst, msrc, size);
548 msrc += src->pitch; mdst += dst->pitch;
551 else
553 for(int iy = h - 1; iy >= 0; --iy)
555 assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
556 assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size);
557 memmove(mdst, msrc, size);
558 msrc += src->pitch; mdst += dst->pitch;
563 else
565 int* dswx = NULL;
566 int* dswy = NULL;
567 int* sswx = NULL;
568 int* sswy = NULL;
569 int dir;
571 if(!dst->pitch)
573 dswx = alloca(w * sizeof(int));
574 for(int ix = 0; ix < w; ++ix) // we are adding, so z cannot be contributed by both
575 dswx[ix] = nv04_swizzle_bits(dst->x + ix, 0, 0, dst->w, dst->h, dst->d);
576 dswy = alloca(h * sizeof(int));
577 for(int iy = 0; iy < h; ++iy)
578 dswy[iy] = nv04_swizzle_bits(0, dst->y + iy, dst->z, dst->w, dst->h, dst->d);
581 if(!src->pitch)
583 sswx = alloca(w * sizeof(int));
584 for(int ix = 0; ix < w; ++ix)
585 sswx[ix] = nv04_swizzle_bits(src->x + ix, 0, 0, src->w, src->h, src->d);
586 sswy = alloca(h * sizeof(int));
587 for(int iy = 0; iy < h; ++iy)
588 sswy[iy] = nv04_swizzle_bits(0, src->y + iy, src->z, src->w, src->h, src->d);
591 dir = 1;
592 /* do backwards copies for overlapping swizzled surfaces */
593 if(dst->pitch == src->pitch && dst->offset == src->offset)
595 if(dst->y > src->y || (dst->y == src->y && dst->x > src->x))
596 dir = -1;
599 #define SWIZZLED_COPY_LOOPS
600 if(dir == 1)
602 int dir = 1;
603 #define LOOP_Y for(int iy = 0; iy < h; ++iy)
604 #define LOOP_X for(int ix = 0; ix < w; ++ix)
605 #include "nv04_2d_loops.h"
606 #undef LOOP_X
607 #undef LOOP_Y
609 else
611 int dir = -1;
612 #define LOOP_Y for(int iy = h - 1; iy >= 0; --iy)
613 #define LOOP_X for(int ix = w - 1; ix >= 0; --ix)
614 #include "nv04_2d_loops.h"
615 #undef LOOP_X
616 #undef LOOP_Y
618 #undef SWIZZLED_COPY_LOOP
621 if(src->bo != dst->bo)
622 nouveau_bo_unmap(src->bo);
623 nouveau_bo_unmap(dst->bo);
626 /* TODO: if the destination is swizzled, we are doing random writes, which causes write combining to fail
627 * the alternative is to read, modify and copy back, which may or may not be faster
628 * loading 3D textures is a common case that hits this and could probably benefit from the temporary
630 void
631 nv04_region_fill_cpu(struct nv04_region* dst, int w, int h, unsigned value)
633 uint8_t* mdst = (nouveau_bo_map(dst->bo, NOUVEAU_BO_WR), (uint8_t*)dst->bo->map + dst->offset);
635 #ifdef NV04_REGION_DEBUG
636 fprintf(stderr, "\tRGN_FILL_CPU ");
637 nv04_region_print(dst);
638 fprintf(stderr, "\n");
639 #endif
641 nv04_region_assert(dst, w, h);
643 if(dst->pitch)
645 unsigned size = w << dst->bpps;
647 #define FILL(T) do { \
648 for(int iy = 0; iy < h; ++iy) \
650 assert((char*)((T*)mdst + w) <= (char*)dst->bo->map + dst->bo->size); \
651 for(int ix = 0; ix < w; ++ix) \
652 ((T*)mdst)[ix] = (T)value; \
653 mdst += dst->pitch; \
655 } while(0)
657 mdst += dst->y * dst->pitch + (dst->x << dst->bpps);
659 if(dst->bpps == 0)
662 assert(mdst + size * h <= (uint8_t*)dst->bo->map + dst->bo->size);
663 if(size == dst->pitch)
664 memset(mdst, (uint8_t)value, size * h);
665 else
667 for(int iy = 0; iy < h; ++iy)
669 assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
670 memset(mdst, (uint8_t)value, size);
671 mdst += dst->pitch;
675 else if(dst->bpps == 1)
677 if(!((uint8_t)value ^ (uint8_t)(value >> 8)))
678 goto ms;
680 FILL(uint16_t);
682 else if(dst->bpps == 2)
684 if(value == (uint8_t)value * 0x1010101)
685 goto ms;
686 FILL(uint32_t);
688 else
689 assert(0);
690 #undef FILL
692 else
694 int* dswx;
695 int* dswy;
697 dswx = alloca(w * sizeof(int));
698 for(int ix = 0; ix < w; ++ix)
699 dswx[ix] = nv04_swizzle_bits(dst->x + ix, 0, dst->z, dst->w, dst->h, dst->d);
700 dswy = alloca(h * sizeof(int));
701 for(int iy = 0; iy < h; ++iy)
702 dswy[iy] = nv04_swizzle_bits(0, dst->y + iy, dst->z, dst->w, dst->h, dst->d);
704 #define FILL(T) do { \
705 T tvalue = (T)value; \
706 for(int iy = 0; iy < h; ++iy) \
708 T* pdst = (T*)mdst + dswy[iy]; \
709 for(int ix = 0; ix < w; ++ix) \
711 assert((uint8_t*)&pdst[dswx[ix] + 1] <= (uint8_t*)dst->bo->map + dst->bo->size); \
712 pdst[dswx[ix]] = tvalue; \
715 } while(0)
717 if(dst->bpps == 0)
718 FILL(uint8_t);
719 else if(dst->bpps == 1)
720 FILL(uint16_t);
721 else if(dst->bpps == 2)
722 FILL(uint32_t);
723 else
724 assert(0 && "unhandled bpp");
725 #undef FILL
728 nouveau_bo_unmap(dst->bo);
731 static inline int
732 nv04_region_cs2d_format(struct nv04_region* rgn)
734 switch(rgn->bpps) {
735 case 0:
736 return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
737 case 1:
738 if(rgn->one_bits >= 1)
739 return NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_X1R5G5B5;
740 else
741 return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5;
742 case 2:
743 if(rgn->one_bits >= 8)
744 return NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_X8R8G8B8;
745 else
746 return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8;
747 default:
748 return -1;
752 static inline int
753 nv04_region_sifm_format(struct nv04_region* rgn)
755 switch(rgn->bpps) {
756 case 0:
757 return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8;
758 case 1:
759 if(rgn->one_bits >= 1)
760 return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5;
761 else
762 return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5;
763 case 2:
764 if(rgn->one_bits >= 8)
765 return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8;
766 else
767 return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8;
768 default:
769 return -1;
772 static void
773 nv04_region_copy_swizzle(struct nv04_2d_context *ctx,
774 struct nv04_region* dst,
775 struct nv04_region* src,
776 int w, int h)
778 struct nouveau_channel *chan = ctx->swzsurf->channel;
779 struct nouveau_grobj *swzsurf = ctx->swzsurf;
780 struct nouveau_grobj *sifm = ctx->sifm;
781 int cs2d_format = nv04_region_cs2d_format(dst);
782 int sifm_format = nv04_region_sifm_format(src);
783 /* Max width & height may not be the same on all HW, but must be POT */
784 unsigned max_shift = 10;
785 unsigned cw = 1 << max_shift;
786 unsigned ch = 1 << max_shift;
787 unsigned sx = dst->x >> max_shift;
788 unsigned sy = dst->y >> max_shift;
789 unsigned ex = (dst->x + w - 1) >> max_shift;
790 unsigned ey = (dst->y + h - 1) >> max_shift;
791 unsigned chunks = (ex - sx + 1) * (ey - sy + 1);
792 unsigned chunk_size;
793 if(dst->w < cw)
794 cw = dst->w;
795 if(dst->h < ch)
796 ch = dst->h;
797 chunk_size = cw * ch << dst->bpps;
799 #ifdef NV04_REGION_DEBUG
800 fprintf(stderr, "\tRGN_COPY_SWIZZLE [%i, %i: %i] ", w, h, dst->bpps);
801 for(int i = 0; i < 2; ++i)
803 nv04_region_print(i ? src : dst);
804 fprintf(stderr, i ? "\n" : " <- ");
806 #endif
808 nv04_region_assert(dst, w, h);
809 nv04_region_assert(src, w, h);
811 MARK_RING (chan, 8 + chunks * 17, 2 + chunks * 2);
813 BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
814 OUT_RELOCo(chan, dst->bo,
815 NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
817 BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1);
818 OUT_RING (chan, cs2d_format |
819 log2i(cw) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U__SHIFT |
820 log2i(ch) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V__SHIFT);
822 BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
823 OUT_RELOCo(chan, src->bo,
824 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
825 BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1);
826 OUT_RING (chan, swzsurf->handle);
828 assert(!(dst->offset & 63));
830 for (int cy = sy; cy <= ey; ++cy) {
831 int ry = MAX2(0, (int)(dst->y - ch * cy));
832 int rh = MIN2((int)ch, (int)(dst->y - ch * cy + h)) - ry;
833 for (int cx = sx; cx <= ex; ++cx) {
834 int rx = MAX2(0, (int)(dst->x - cw * cx));
835 int rw = MIN2((int)cw, (int)(dst->x - cw * cx + w)) - rx;
836 unsigned dst_offset;
837 unsigned src_offset;
839 BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
841 dst_offset = dst->offset + (nv04_swizzle_bits_2d(cx * cw, cy * ch, dst->w, dst->h) << dst->bpps);
842 assert(dst_offset <= dst->bo->size);
843 assert(dst_offset + chunk_size <= dst->bo->size);
844 OUT_RELOCl(chan, dst->bo, dst_offset,
845 NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
847 BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
848 OUT_RING (chan, NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
849 OUT_RING (chan, sifm_format);
850 OUT_RING (chan, NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
851 OUT_RING (chan, rx | (ry << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y__SHIFT));
852 OUT_RING (chan, rh << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H__SHIFT | rw);
853 OUT_RING (chan, rx | (ry << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y__SHIFT));
854 OUT_RING (chan, rh << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H__SHIFT | rw);
855 OUT_RING (chan, 1 << 20);
856 OUT_RING (chan, 1 << 20);
858 BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_SIZE, 4);
859 OUT_RING (chan, rh << NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H__SHIFT | align(rw, 8));
860 OUT_RING (chan, src->pitch |
861 NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
862 NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
863 src_offset = src->offset + (cy * ch + ry + src->y - dst->y) * src->pitch + ((cx * cw + rx + src->x - dst->x) << src->bpps);
864 assert(src_offset <= src->bo->size);
865 assert(src_offset + (src->pitch * (rh - 1)) + (rw << src->bpps) <= src->bo->size);
866 OUT_RELOCl(chan, src->bo, src_offset,
867 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
868 OUT_RING (chan, 0);
873 static inline void
874 nv04_copy_m2mf_begin(struct nv04_2d_context *ctx, struct nouveau_bo* dstbo, struct nouveau_bo* srcbo, unsigned commands)
876 struct nouveau_channel *chan = ctx->m2mf->channel;
877 struct nouveau_grobj *m2mf = ctx->m2mf;
878 MARK_RING (chan, 3 + commands * 9, 2 + commands * 2);
879 BEGIN_RING(chan, m2mf, NV04_M2MF_DMA_BUFFER_IN, 2);
880 OUT_RELOCo(chan, srcbo,
881 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
882 OUT_RELOCo(chan, dstbo,
883 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
886 static inline void
887 nv04_copy_m2mf_body(struct nv04_2d_context *ctx, struct nouveau_bo* dstbo, int* pdstoff, unsigned dstpitch, struct nouveau_bo* srcbo, int* psrcoff, unsigned srcpitch, unsigned size, unsigned lines)
889 struct nouveau_channel *chan = ctx->m2mf->channel;
890 struct nouveau_grobj *m2mf = ctx->m2mf;
892 #ifdef NV04_REGION_DEBUG
893 fprintf(stderr, "\t\t\tCOPY_M2MF_BODY [%i, %i] <%i[%u]> lin %u <- <%i[%u]> lin %u\n", size, lines, dstbo->handle, *pdstoff, dstpitch, srcbo->handle, *psrcoff, srcpitch);
894 #endif
896 BEGIN_RING(chan, m2mf, NV04_M2MF_OFFSET_IN, 8);
897 OUT_RELOCl(chan, srcbo, *psrcoff,
898 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
899 OUT_RELOCl(chan, dstbo, *pdstoff,
900 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
901 OUT_RING (chan, srcpitch);
902 OUT_RING (chan, dstpitch);
903 OUT_RING (chan, size);
904 OUT_RING (chan, lines);
905 OUT_RING (chan, 0x0101);
906 OUT_RING (chan, 0);
908 *psrcoff += srcpitch * lines;
909 *pdstoff += dstpitch * lines;
912 static void
913 nv04_copy_m2mf(struct nv04_2d_context *ctx,
914 struct nouveau_bo* dstbo, int dstoff, unsigned dstpitch,
915 struct nouveau_bo* srcbo, int srcoff, unsigned srcpitch,
916 unsigned size, unsigned h)
918 unsigned max_pitch = 32767;
919 unsigned max_lines = 2047;
921 #ifdef NV04_REGION_DEBUG
922 fprintf(stderr, "\t\tCOPY_M2MF [%i, %i] <%i[%i]> lin %u <- <%i[%i]> lin %u\n", size, h, dstbo->handle, dstoff, dstpitch, srcbo->handle, srcoff, srcpitch);
923 #endif
925 if(srcpitch <= max_pitch && dstpitch <= max_pitch)
927 unsigned full_pages = h / max_lines;
928 unsigned leftover_lines = h - full_pages * max_lines;
930 nv04_copy_m2mf_begin(ctx, dstbo, srcbo, full_pages + !!leftover_lines);
932 for(unsigned i = 0; i < full_pages; ++i)
933 nv04_copy_m2mf_body(ctx, dstbo, &dstoff, dstpitch, srcbo, &srcoff, srcpitch, size, max_lines);
935 if(leftover_lines)
936 nv04_copy_m2mf_body(ctx, dstbo, &dstoff, dstpitch, srcbo, &srcoff, srcpitch, size, leftover_lines);
938 else
940 unsigned lines = size / max_pitch;
941 unsigned leftover = size - lines * max_pitch;
942 unsigned full_pages = lines / max_lines;
943 unsigned leftover_lines = lines - full_pages * max_lines;
944 unsigned srcgap = srcpitch - size;
945 unsigned dstgap = dstpitch - size;
947 nv04_copy_m2mf_begin(ctx, dstbo, srcbo, h * (full_pages + !!leftover_lines + !!leftover));
949 for(unsigned i = 0; i < h; ++i)
951 for(unsigned j = 0; j < full_pages; ++j)
952 nv04_copy_m2mf_body(ctx, dstbo, &dstoff, max_pitch, srcbo, &srcoff, max_pitch, max_pitch, max_lines);
954 if(leftover_lines)
955 nv04_copy_m2mf_body(ctx, dstbo, &dstoff, max_pitch, srcbo, &srcoff, max_pitch, max_pitch, leftover_lines);
957 if(leftover)
958 nv04_copy_m2mf_body(ctx, dstbo, &dstoff, leftover, srcbo, &srcoff, leftover, leftover, 1);
960 srcoff += srcgap;
961 dstoff += dstgap;
966 void
967 nv04_memcpy(struct nv04_2d_context *ctx, struct nouveau_bo* dstbo, int dstoff, struct nouveau_bo* srcbo, int srcoff, unsigned size)
969 #ifdef NV04_REGION_DEBUG
970 fprintf(stderr, "\tMEMCPY [%i] <%i[%i]> <- <%i[%i]>\n", size, dstbo->handle, dstoff, srcbo->handle, srcoff);
971 #endif
973 nv04_copy_m2mf(ctx, dstbo, dstoff, size, srcbo, srcoff, size, size, 1);
976 static void
977 nv04_region_copy_m2mf(struct nv04_2d_context *ctx, struct nv04_region *dst, struct nv04_region *src, int w, int h)
979 #ifdef NV04_REGION_DEBUG
980 fprintf(stderr, "\tRGN_COPY_M2MF [%i, %i: %i] ", w, h, dst->bpps);
981 for(int i = 0; i < 2; ++i)
983 nv04_region_print(i ? src : dst);
984 fprintf(stderr, i ? "\n" : " <- ");
986 #endif
988 nv04_region_assert(dst, w, h);
989 nv04_region_assert(src, w, h);
990 assert(src->pitch);
991 assert(dst->pitch);
993 nv04_copy_m2mf(ctx,
994 dst->bo, dst->offset + dst->y * dst->pitch + (dst->x << dst->bpps), dst->pitch,
995 src->bo, src->offset + src->y * src->pitch + (src->x << src->bpps), src->pitch,
996 w << src->bpps, h);
999 static inline void
1000 nv04_region_copy_blit(struct nv04_2d_context *ctx, struct nv04_region* dst, struct nv04_region* src, int w, int h)
1002 struct nouveau_channel *chan = ctx->surf2d->channel;
1003 struct nouveau_grobj *surf2d = ctx->surf2d;
1004 struct nouveau_grobj *blit = ctx->blit;
1005 int cs2d_format = nv04_region_cs2d_format(dst);
1007 #ifdef NV04_REGION_DEBUG
1008 fprintf(stderr, "\tRGN_COPY_BLIT [%i, %i: %i] ", w, h, dst->bpps);
1009 for(int i = 0; i < 2; ++i)
1011 nv04_region_print(i ? src : dst);
1012 fprintf(stderr, i ? "\n" : " <- ");
1014 #endif
1016 assert(!(src->pitch & 63) && src->pitch);
1017 assert(!(dst->pitch & 63) && dst->pitch);
1018 nv04_region_assert(dst, w, h);
1019 nv04_region_assert(src, w, h);
1021 MARK_RING (chan, 12, 4);
1022 BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
1023 OUT_RELOCo(chan, src->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
1024 OUT_RELOCo(chan, dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
1025 BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
1026 OUT_RING (chan, cs2d_format);
1027 OUT_RING (chan, (dst->pitch << 16) | src->pitch);
1028 OUT_RELOCl(chan, src->bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
1029 OUT_RELOCl(chan, dst->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
1031 BEGIN_RING(chan, blit, 0x0300, 3);
1032 OUT_RING (chan, (src->y << 16) | src->x);
1033 OUT_RING (chan, (dst->y << 16) | dst->x);
1034 OUT_RING (chan, ( h << 16) | w);
1037 /* THEOREM: a non-linearizable swizzled destination is always 64 byte aligned, except for 4x2 mipmap levels of swizzled 1bpp surfaces
1038 * HYPOTESIS:
1039 * 1. The first mipmap level is 64-byte-aligned
1040 * PROOF:
1041 * 1. Thus, all mipmaps level with a parent which is 64-byte or more in size are.
1042 * 2. At 1bpp, the smallest levels with a <= 32-byte parent are either Nx1 or 1xN or size <=8, thus 4x2, 2x2 or 2x4
1043 * 3. Nx1, 1xN, 2x4, 2x2 have all subrects linearizable. 4x2 does not.
1044 * 4. At 2/4bpp or more, the smallest levels with a 32-byte parent are 1xN, Nx1 or 2x2
1046 * However, nv04_region_align handles that.
1049 // 0 -> done, 1 -> do with 3D engine or CPU, -1 -> do with CPU
1050 // dst and src may be modified, and the possibly modified version should be passed to nv04_region_cpu if necessary
1052 nv04_region_copy_2d(struct nv04_2d_context *ctx, struct nv04_region* dst, struct nv04_region* src,
1053 int w, int h, int dst_to_gpu, int src_on_gpu)
1055 assert(src->bpps == dst->bpps);
1057 #ifdef NV04_REGION_DEBUG
1058 fprintf(stderr, "RGN_COPY [%i, %i: %i] ", w, h, dst->bpps);
1059 for(int i = 0; i < 2; ++i)
1061 int gpu = i ? src_on_gpu : dst_to_gpu;
1062 nv04_region_print(i ? src : dst);
1063 fprintf(stderr, " %s", gpu ? "gpu" : "cpu");
1064 fprintf(stderr, i ? "\n" : " <- ");
1066 #endif
1068 // if they are contiguous and either both swizzled or both linear, reshape
1069 if(!dst->pitch == !src->pitch
1070 && nv04_region_is_contiguous(dst, w, h)
1071 && nv04_region_is_contiguous(src, w, h))
1073 nv04_region_contiguous_shape(dst, &w, &h, 6);
1074 nv04_region_linearize_contiguous(dst, w, h);
1075 nv04_region_linearize_contiguous(src, w, h);
1078 #ifdef NV04_REGION_DEBUG
1079 fprintf(stderr, "\tOPT ");
1080 for(int i = 0; i < 2; ++i)
1082 nv04_region_print(i ? src : dst);
1083 fprintf(stderr, i ? "\n" : " <- ");
1085 #endif
1087 /* if the destination is not for GPU _and_ source is on CPU, use CPU */
1088 /* if the destination is not for GPU _or_ source is on CPU, use CPU only if we think it's faster than the GPU */
1089 /* TODO: benchmark to find out in which cases exactly we should prefer the CPU */
1090 if((!dst_to_gpu && !src_on_gpu)
1091 || (!dst->pitch && dst->d > 1)
1092 /* 3D swizzled destination are unwritable by the GPU, and 2D swizzled ones are readable only by the 3D engine */
1094 return -1;
1095 /* there is no known way to read 2D/3D-swizzled surfaces with the 2D engine
1096 * ask the caller to use the 3D engine
1097 * If a format cannot be sampled from the 3D engine there is no point in making it swizzled, so we must not do so
1099 else if(!src->pitch)
1101 #ifdef NV04_REGION_DEBUG
1102 fprintf(stderr, "\tCOPY_ENG3D\n");
1103 #endif
1104 return 1;
1106 /* Setup transfer to swizzle the texture to vram if needed */
1107 else
1109 if (!dst->pitch)
1111 if(!dst_to_gpu)
1113 #ifdef NV04_REGION_DEBUG
1114 fprintf(stderr, "\tCOPY_ENG3D\n");
1115 #endif
1116 return 1;
1118 else
1120 assert(!nv04_region_align(dst, w, h, 6));
1122 nv04_region_copy_swizzle(ctx, dst, src, w, h);
1123 return 0;
1126 else
1128 /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback
1129 * to NV_M2MF in this case.
1130 * TODO: is this also true for the source? possibly not
1131 * TODO: should we just always use m2mf?
1132 * TODO: if not, add support for multiple operations to copy_blit
1135 if (!dst_to_gpu
1136 || w > 2047
1137 || h > 2047
1138 || (w & 1)
1139 || nv04_region_align(src, w, h, 6)
1140 || nv04_region_align(dst, w, h, 6)
1142 nv04_region_copy_m2mf(ctx, dst, src, w, h);
1143 else
1144 nv04_region_copy_blit(ctx, dst, src, w, h);
1146 return 0;
1151 static inline void
1152 nv04_region_fill_gdirect(struct nv04_2d_context *ctx, struct nv04_region* dst, int w, int h, unsigned value)
1154 struct nouveau_channel *chan = ctx->surf2d->channel;
1155 struct nouveau_grobj *surf2d = ctx->surf2d;
1156 struct nouveau_grobj *rect = ctx->rect;
1157 int cs2d_format, gdirect_format;
1159 #ifdef NV04_REGION_DEBUG
1160 fprintf(stderr, "\tFILL_GDIRECT\n");
1161 #endif
1163 assert(!(dst->pitch & 63) && dst->pitch);
1164 nv04_region_assert(dst, w, h);
1166 switch(dst->bpps)
1168 case 0:
1169 gdirect_format = NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
1170 cs2d_format = NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
1171 break;
1172 case 1:
1173 gdirect_format = NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5;
1174 cs2d_format = NV04_CONTEXT_SURFACES_2D_FORMAT_Y16;
1175 break;
1176 case 2:
1177 gdirect_format = NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
1178 cs2d_format = NV04_CONTEXT_SURFACES_2D_FORMAT_Y32;
1179 break;
1180 default:
1181 assert(0);
1182 gdirect_format = 0;
1183 cs2d_format = 0;
1184 break;
1187 MARK_RING (chan, 15, 4);
1188 BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
1189 OUT_RELOCo(chan, dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
1190 OUT_RELOCo(chan, dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
1191 BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
1192 OUT_RING (chan, cs2d_format);
1193 OUT_RING (chan, (dst->pitch << 16) | dst->pitch);
1194 OUT_RELOCl(chan, dst->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
1195 OUT_RELOCl(chan, dst->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
1197 BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1);
1198 OUT_RING (chan, gdirect_format);
1199 BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1);
1200 OUT_RING (chan, value);
1201 BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2);
1202 OUT_RING (chan, (dst->x << 16) | dst->y);
1203 OUT_RING (chan, ( w << 16) | h);
1207 nv04_region_fill_2d(struct nv04_2d_context *ctx, struct nv04_region *dst,
1208 int w, int h, unsigned value)
1210 if(!w || !h)
1211 return 0;
1213 #ifdef NV04_REGION_DEBUG
1214 fprintf(stderr, "FILL [%i, %i: %i] ", w, h, dst->bpps);
1215 nv04_region_print(dst);
1216 fprintf(stderr, " <- 0x%x\n", value);
1217 #endif
1219 if(nv04_region_is_contiguous(dst, w, h))
1221 nv04_region_contiguous_shape(dst, &w, &h, 6);
1222 nv04_region_linearize_contiguous(dst, w, h);
1225 // TODO: maybe do intermediate copies for some cases instead of using the 3D engine/CPU
1226 /* GdiRect doesn't work together with swzsurf, so the 3D engine, or an intermediate copy, is the only option here */
1227 if(!dst->pitch)
1229 #ifdef NV04_REGION_DEBUG
1230 fprintf(stderr, "\tFILL_ENG3D\n");
1231 #endif
1232 return 1;
1234 else if(!nv04_region_align(dst, w, h, 6))
1236 nv04_region_fill_gdirect(ctx, dst, w, h, value);
1237 return 0;
1239 else
1240 return -1;
1244 void
1245 nv04_2d_context_takedown(struct nv04_2d_context *ctx)
1247 nouveau_notifier_free(&ctx->ntfy);
1248 nouveau_grobj_free(&ctx->m2mf);
1249 nouveau_grobj_free(&ctx->surf2d);
1250 nouveau_grobj_free(&ctx->swzsurf);
1251 nouveau_grobj_free(&ctx->rect);
1252 nouveau_grobj_free(&ctx->blit);
1253 nouveau_grobj_free(&ctx->sifm);
1255 free(ctx);
1258 struct nv04_2d_context *
1259 nv04_2d_context_init(struct nouveau_channel* chan)
1261 struct nv04_2d_context *ctx = calloc(1, sizeof(struct nv04_2d_context));
1262 unsigned handle = 0x88000000, class;
1263 int ret;
1265 if (!ctx)
1266 return NULL;
1268 ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy);
1269 if (ret) {
1270 nv04_2d_context_takedown(ctx);
1271 return NULL;
1274 ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf);
1275 if (ret) {
1276 nv04_2d_context_takedown(ctx);
1277 return NULL;
1280 BEGIN_RING(chan, ctx->m2mf, NV04_M2MF_DMA_NOTIFY, 1);
1281 OUT_RING (chan, ctx->ntfy->handle);
1283 if (chan->device->chipset < 0x10)
1284 class = NV04_CONTEXT_SURFACES_2D;
1285 else
1286 class = NV10_CONTEXT_SURFACES_2D;
1288 ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d);
1289 if (ret) {
1290 nv04_2d_context_takedown(ctx);
1291 return NULL;
1294 BEGIN_RING(chan, ctx->surf2d,
1295 NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
1296 OUT_RING (chan, chan->vram->handle);
1297 OUT_RING (chan, chan->vram->handle);
1299 if (chan->device->chipset < 0x10)
1300 class = NV04_IMAGE_BLIT;
1301 else
1302 class = NV11_IMAGE_BLIT;
1304 ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit);
1305 if (ret) {
1306 nv04_2d_context_takedown(ctx);
1307 return NULL;
1310 BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_DMA_NOTIFY, 1);
1311 OUT_RING (chan, ctx->ntfy->handle);
1312 BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACES, 1);
1313 OUT_RING (chan, ctx->surf2d->handle);
1314 BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_OPERATION, 1);
1315 OUT_RING (chan, NV01_IMAGE_BLIT_OPERATION_SRCCOPY);
1317 ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT,
1318 &ctx->rect);
1319 if (ret) {
1320 nv04_2d_context_takedown(ctx);
1321 return NULL;
1324 BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1);
1325 OUT_RING (chan, ctx->ntfy->handle);
1326 BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1);
1327 OUT_RING (chan, ctx->surf2d->handle);
1328 BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1);
1329 OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY);
1330 BEGIN_RING(chan, ctx->rect,
1331 NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1);
1332 OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE);
1334 switch (chan->device->chipset & 0xf0) {
1335 case 0x00:
1336 case 0x10:
1337 class = NV04_SWIZZLED_SURFACE;
1338 break;
1339 case 0x20:
1340 class = NV11_SWIZZLED_SURFACE;
1341 break;
1342 case 0x30:
1343 class = NV30_SWIZZLED_SURFACE;
1344 break;
1345 case 0x40:
1346 case 0x60:
1347 class = NV40_SWIZZLED_SURFACE;
1348 break;
1349 default:
1350 /* Famous last words: this really can't happen.. */
1351 assert(0);
1352 break;
1355 ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf);
1356 if (ret) {
1357 nv04_2d_context_takedown(ctx);
1358 return NULL;
1361 /* all the Gallium MARK_RING calculations assume no autobinding, so do that now */
1362 if(ctx->swzsurf->bound == NOUVEAU_GROBJ_UNBOUND)
1363 nouveau_grobj_autobind(ctx->swzsurf);
1365 switch (chan->device->chipset & 0xf0) {
1366 case 0x10:
1367 case 0x20:
1368 class = NV10_SCALED_IMAGE_FROM_MEMORY;
1369 break;
1370 case 0x30:
1371 class = NV30_SCALED_IMAGE_FROM_MEMORY;
1372 break;
1373 case 0x40:
1374 case 0x60:
1375 class = NV40_SCALED_IMAGE_FROM_MEMORY;
1376 break;
1377 default:
1378 class = NV04_SCALED_IMAGE_FROM_MEMORY;
1379 break;
1382 ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm);
1383 if (ret) {
1384 nv04_2d_context_takedown(ctx);
1385 return NULL;
1388 /* all the Gallium MARK_RING calculations assume no autobinding, so do that now */
1389 if(ctx->sifm->bound == NOUVEAU_GROBJ_UNBOUND)
1390 nouveau_grobj_autobind(ctx->sifm);
1392 return ctx;