Merge pull request #5 from polachok/new
[pscnv.git] / test / mem_test.c
blobd50965175b4f48af68ea869e2725eb79db45bf2d
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2010 PathScale Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <fcntl.h>
28 #include <assert.h>
29 #include <errno.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include "libpscnv.h"
35 #include "libpscnv_ib.h"
37 #define CPREGS 4
38 #define START_STRIDED 0
39 #define START_LINEAR 0x50
40 uint32_t cpcode[] = {
41 0x11002218, // mov b16 $r3l u16 s[0x2] // gets ntid.x to $r3l
42 0x61062c00, // add $r0 mul u16 u16 s[0xc] $r3l $r0 // gets tid.x + ctaid.x * ntid.x to $r0, our loop counter [tid.x is always at $r0 at CP start]
43 0x3000c9fd, 0x6420c7c8, // set le u32 $c0 # b32 s[0x10] $r0 // if size <= counter, set $c0
44 0x30000003, 0x00000280, // lg $c0 ret // if size <= counter, exit
45 // loop:
46 0x30020005, 0xc4100780, // shl b32 $r1 $r0 0x2 // counter*4 to $r1
47 0xd0000209, 0x80c00780, // mov b32 $r2 g0[$r1] // read from input area
48 0xd0010209, 0xa0c00780, // mov b32 g1[$r1] $r2 // write to output area
49 0x60064801, 0x00200780, // add $r0 mul u16 u16 s[0x8] $r3l $r0 // counter += ntid.x * nctaid.x
50 0x3000c9fd, 0x642107c8, // set gt u32 $c0 # b32 s[0x10] $r0 // if size > counter, set $c0
51 0x10003003, 0x00000280, // lg $c0 bra 0x18 // if size > counter, loop
52 0x30000003, 0x00000780, // ret // exit program.
54 0x1100ea00, // mov b32 $r0 b32 s[0x14] // gets stride to $r0
55 0x41002c04, // mul $r1 u16 u16 s[0xc] u16 $r0l
56 0x41012c08, // mul $r2 u16 u16 s[0xc] u16 $r0h
57 0x2004060c, // add b16 $r1h $r1h $r2l // multiplies stride by ctaid.x and puts result into $r1, our loop counter
58 0x20000009, 0x04004780, // add b32 $r2 $r0 $r1 // stride*ctaid.x + stride into $r2
59 0x3002c809, 0xa4200780, // min u32 $r2 b32 s[0x10] $r2 // min (stride*ctaid.x + stride, size) into $r2, the upper bound of our loop
60 0x300203fd, 0x640187c8, // set ge u32 $c0 # $r1 $r2 // set $c0 if counter >= max
61 0x30000003, 0x00000280, // lg $c0 ret // finish execution if counter >= max
62 // loop:
63 0x3002020d, 0xc4100780, // shl b32 $r3 $r1 0x2 // counter*4 to $r3
64 0xd0000601, 0x80c00780, // mov b32 $r0 g0[$r3] // read from input area
65 0xd0010601, 0xa0c00780, // mov b32 g1[$r3] $r0 // write to output area
66 0x20018205, 0x00000003, // add b32 $r1 $r1 0x1 // counter++
67 0x300203fd, 0x640047c8, // set lt u32 $c0 # $r1 $r2 // set $c0 if counter < max
68 0x10010003, 0x00000280, // lg $c0 bra 0x80 // and branch back to loop if so
69 0x30000003, 0x00000780, // ret // exit program.
73 #define CPSZ sizeof(cpcode)
75 struct pscnv_ib_bo *in;
76 struct pscnv_ib_bo *out;
77 struct pscnv_ib_bo *cp;
78 struct pscnv_ib_chan *chan;
80 int bytes, ints, threads, ctas;
82 void init(int *drm_fd) {
83 int fd, err, ret;
84 int chan_id;
86 fd = drmOpen("pscnv", 0);
87 if (fd == -1) {
88 printf ("failed to open drm");
89 exit(1);
92 *drm_fd = fd;
93 err = pscnv_ib_chan_new(fd, 0, &chan, 0xdeadbeef, 0, 0);
94 if (err < 0){
95 printf ("chan: %s\n", strerror(-err));
96 exit(1);
99 ret = pscnv_obj_eng_new(fd, chan->cid, 0xdeadd00d, 0x50c0, 0);
100 if (ret != 0) {
101 printf ("tesla: %s\n", strerror(-ret));
102 exit(1);
105 if (err = pscnv_ib_bo_alloc(fd, chan->vid, 0x1, PSCNV_GEM_VRAM_SMALL | PSCNV_GEM_MAPPABLE, 0, bytes + 0x4000, 0, &in)) {
106 printf ("in: %s\n", strerror(-err));
107 exit(1);
110 if (err = pscnv_ib_bo_alloc(fd, chan->vid, 0x1, PSCNV_GEM_VRAM_SMALL | PSCNV_GEM_MAPPABLE, 0, bytes + 0x4000, 0, &out)) {
111 printf ("out: %s\n", strerror(-err));
112 exit(1);
115 if (err = pscnv_ib_bo_alloc(fd, chan->vid, 0x1, PSCNV_GEM_VRAM_SMALL | PSCNV_GEM_MAPPABLE, 0, 4096, 0, &cp)) {
116 printf ("cp: %s\n", strerror(-err));
117 exit(1);
120 memcpy(cp->map, cpcode, CPSZ);
122 /* from which offset of pushbuf, size of command and find free slot by nouveau_dma_wait and submit by nvchan_wr32(chan, 0x8c, chan->dma.ib_put); */
123 /* write batchbuffer don't forget to fire at the end */
125 /* nouveau_pushbufs_alloc */
127 BEGIN_RING50(chan, 0, 0, 1);
128 OUT_RING(chan, 0xdeadd00d);
130 // BEGIN_RING50(chan, 0, 0x180, 1); // DMA_NOTIFY
131 // OUT_RING(chan, (subc << 13) | (1 << 18) | 0x180);
132 // OUT_RING(chan, notify->handle);
134 BEGIN_RING50(chan, 0, 0x1a0, 1); // DMA_GLOBAL
135 OUT_RING(chan, 0xdeadbeef);
137 BEGIN_RING50(chan, 0, 0x1c0, 1); // DMA_CODE_CB
138 OUT_RING(chan, 0xdeadbeef);
140 BEGIN_RING50(chan, 0, 0x2b8, 1); // enable all lanes
141 OUT_RING(chan, 0x1);
142 BEGIN_RING50(chan, 0, 0x3b8, 1);
143 OUT_RING(chan, 0x2);
144 /* nouveau_pushbufs_submit */
145 FIRE_RING(chan);
149 struct timeval tvb, tve;
151 void prepare_mem(int fd) {
152 int err;
153 uint32_t* test;
155 if (err = nouveau_bo_map(in, NOUVEAU_BO_RD|NOUVEAU_BO_WR)) {
156 printf ("mapin: %s\n", strerror(-err));
157 exit(1);
159 memset (in->map, 1, bytes);
160 nouveau_bo_unmap (in);
162 memset(in->map, 1, bytes);
164 if (err = nouveau_bo_map(out, NOUVEAU_BO_RD|NOUVEAU_BO_WR)) {
165 printf ("mapout: %s\n", strerror(-err));
166 exit(1);
168 memset (out->map, 0, bytes);
169 nouveau_bo_unmap (out);
171 memset(out->map, 0, bytes);
172 gettimeofday(&tvb, 0);
175 void check_mem(int fd) {
176 int *intptr, i;
177 int err;
179 if (err = nouveau_bo_map(out, NOUVEAU_BO_RD|NOUVEAU_BO_WR)) {
180 printf ("mapout: %s\n", strerror(-err));
181 exit(1);
184 gettimeofday(&tve, 0); // we need to get the time after mapping, since it's the sync point with GPU
185 double secdiff = tve.tv_sec - tvb.tv_sec;
186 secdiff += (tve.tv_usec - tvb.tv_usec) / 1000000.0;
187 printf ("\t%fs ", secdiff);
189 intptr = malloc(bytes);
190 memcpy(intptr, out->map, bytes);
192 // intptr = out->map;
193 for (i = 0; i < ints; i++)
194 if (intptr[i] != 0x01010101) {
195 printf ("Copy failed at index %d!\n", i);
196 return;
199 // nouveau_bo_unmap (out);
200 printf ("Passed.\n");
203 void stridetest(int fd) {
204 printf ("Trying strided access... ");
205 prepare_mem(fd);
207 BEGIN_RING50(chan, 0, 0x210, 2);
208 // OUT_RELOC(chan, cp, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
209 // OUT_RELOC(chan, cp, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
210 OUT_RING(chan, cp->vm_base >> 32);
211 OUT_RING(chan, cp->vm_base);
213 BEGIN_RING50(chan, 0, 0x2b4, 1);
214 OUT_RING(chan, threads); // THREADS_PER_BLOCK
216 BEGIN_RING50(chan, 0, 0x2c0, 1);
217 OUT_RING(chan, CPREGS);
219 BEGIN_RING50(chan, 0, 0x3a4, 5);
220 OUT_RING(chan, 0x00010000 | ctas); // GRIDDIM
221 OUT_RING(chan, 0x40); // SHARED_SIZE
222 OUT_RING(chan, 0x10000 | threads); // BLOCKDIM_XY
223 OUT_RING(chan, 0x1); // BLOCKDIM_Z
224 OUT_RING(chan, 0); // CP_START_ID
226 BEGIN_RING50(chan, 0, 0x400, 5); // input segment
227 // OUT_RELOC(chan, in, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
228 // OUT_RELOC(chan, in, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
229 OUT_RING(chan, in->vm_base >> 32);
230 OUT_RING(chan, in->vm_base);
231 OUT_RING(chan, 0x00000);
232 OUT_RING(chan, 0xfffffff);
233 OUT_RING(chan, 1);
235 BEGIN_RING50(chan, 0, 0x420, 5); // output segment
236 // OUT_RELOC(chan, out, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR | NOUVEAU_BO_HIGH, 0, 0);
237 // OUT_RELOC(chan, out, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR | NOUVEAU_BO_LOW, 0, 0);
238 OUT_RING(chan, out->vm_base >> 32);
239 OUT_RING(chan, out->vm_base);
240 OUT_RING(chan, 0x00000);
241 OUT_RING(chan, 0xfffffff);
242 OUT_RING(chan, 1);
244 BEGIN_RING50(chan, 0, 0x374, 1); // USER_PARAM_COUNT
245 OUT_RING(chan, 1 << 8);
247 BEGIN_RING50(chan, 0, 0x600, 1); // USER_PARAM
248 OUT_RING(chan, bytes/4);
250 BEGIN_RING50(chan, 0, 0x3b4, 1); // CP_START_ID
251 OUT_RING(chan, START_STRIDED);
253 BEGIN_RING50(chan, 0, 0x2f8, 1);
254 OUT_RING(chan, 1); // latch BLOCKDIM
256 BEGIN_RING50(chan, 0, 0x368, 1);
257 OUT_RING(chan, 0); // LAUNCH
259 BEGIN_RING50(chan, 0, 0x50, 1);
260 OUT_RING(chan, 0x1); // LAUNCH
262 FIRE_RING(chan);
264 while (chan->chmap[0x48/4] != 1);
266 check_mem(fd);
269 void lineartest(int fd) {
270 printf ("Trying linear access... ");
271 prepare_mem(fd);
273 BEGIN_RING50(chan, 0, 0x210, 2);
274 // OUT_RELOC(chan, cp, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
275 // OUT_RELOC(chan, cp, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
276 OUT_RING(chan, cp->vm_base >> 32);
277 OUT_RING(chan, cp->vm_base);
279 BEGIN_RING50(chan, 0, 0x2b4, 1);
280 OUT_RING(chan, 1); // THREADS_PER_BLOCK
282 BEGIN_RING50(chan, 0, 0x2c0, 1);
283 OUT_RING(chan, CPREGS);
285 BEGIN_RING50(chan, 0, 0x3a4, 5);
286 OUT_RING(chan, 0x00010000 | ctas); // GRIDDIM
287 OUT_RING(chan, 0x40); // SHARED_SIZE
288 OUT_RING(chan, 0x10001); // BLOCKDIM_XY
289 OUT_RING(chan, 0x1); // BLOCKDIM_Z
290 OUT_RING(chan, 0); // CP_START_ID
292 BEGIN_RING50(chan, 0, 0x400, 5); // input segment
293 // OUT_RELOC(chan, in, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
294 // OUT_RELOC(chan, in, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
295 OUT_RING(chan, in->vm_base >> 32);
296 OUT_RING(chan, in->vm_base);
297 OUT_RING(chan, 0x00000);
298 OUT_RING(chan, 0xfffffff);
299 OUT_RING(chan, 1);
301 BEGIN_RING50(chan, 0, 0x420, 5); // output segment
302 // OUT_RELOC(chan, out, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR | NOUVEAU_BO_HIGH, 0, 0);
303 // OUT_RELOC(chan, out, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR | NOUVEAU_BO_LOW, 0, 0);
304 OUT_RING(chan, out->vm_base >> 32);
305 OUT_RING(chan, out->vm_base);
306 OUT_RING(chan, 0x00000);
307 OUT_RING(chan, 0xfffffff);
308 OUT_RING(chan, 1);
310 BEGIN_RING50(chan, 0, 0x374, 1); // USER_PARAM_COUNT
311 OUT_RING(chan, 2 << 8);
313 int stride = ( ints % ctas == 0 ) ? ints / ctas : (ints / ctas) + 1;
315 BEGIN_RING50(chan, 0, 0x600, 2); // USER_PARAM
316 OUT_RING(chan, bytes);
317 OUT_RING(chan, stride);
319 BEGIN_RING50(chan, 0, 0x3b4, 1); // CP_START_ID
320 OUT_RING(chan, START_LINEAR);
322 BEGIN_RING50(chan, 0, 0x2f8, 1);
323 OUT_RING(chan, 1); // latch BLOCKDIM
325 BEGIN_RING50(chan, 0, 0x368, 1);
326 OUT_RING(chan, 0); // LAUNCH
328 BEGIN_RING50(chan, 0, 0x50, 1);
329 OUT_RING(chan, 2);
331 FIRE_RING(chan);
333 while (chan->chmap[0x48/4] != 2);
335 check_mem(fd);
337 int main(int argc, char **argv) {
338 int c;
339 int fd;
340 // bytes = 1000000;
341 bytes = 245 * 4096;
342 bytes = 16 * 4096;
343 ctas = 10;
344 threads = 128;
347 while ((c = getopt (argc, argv, "s:c:t:")) != -1)
348 switch (c) {
349 case 's':
350 bytes = atoi(optarg);
351 break;
352 case 'c':
353 ctas = atoi(optarg);
354 break;
355 case 't':
356 threads = atoi(optarg);
357 break;
360 ints = bytes / sizeof(int);
362 init(&fd);
364 stridetest(fd);
365 lineartest(fd);
367 return 0;