libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c

   1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
   2 /* { dg-additional-options "-lcuda" } */
   3 /* { dg-require-effective-target openacc_cuda } */
   4
   5 #include <stdio.h>
   6 #include <stdlib.h>
   7 #include <unistd.h>
   8 #include <openacc.h>
   9 #include <cuda.h>
  10
  11 int
  12 main (int argc, char **argv)
  13 {
  14   CUdevice dev;
  15   CUfunction delay2;
  16   CUmodule module;
  17   CUresult r;
  18   int N;
  19   int i;
  20   CUstream *streams;
  21   unsigned long **a, **d_a, *tid, ticks;
  22   int nbytes;
  23   void *kargs[3];
  24   int clkrate;
  25   int devnum, nprocs;
  26
  27   acc_init (acc_device_nvidia);
  28
  29   devnum = acc_get_device_num (acc_device_nvidia);
  30
  31   r = cuDeviceGet (&dev, devnum);
  32   if (r != CUDA_SUCCESS)
  33     {
  34       fprintf (stderr, "cuDeviceGet failed: %d\n", r);
  35       abort ();
  36     }
  37
  38   r =
  39     cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
  40                           dev);
  41   if (r != CUDA_SUCCESS)
  42     {
  43       fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
  44       abort ();
  45     }
  46
  47   r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
  48   if (r != CUDA_SUCCESS)
  49     {
  50       fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
  51       abort ();
  52     }
  53
  54   r = cuModuleLoad (&module, "subr.ptx");
  55   if (r != CUDA_SUCCESS)
  56     {
  57       fprintf (stderr, "cuModuleLoad failed: %d\n", r);
  58       abort ();
  59     }
  60
  61   r = cuModuleGetFunction (&delay2, module, "delay2");
  62   if (r != CUDA_SUCCESS)
  63     {
  64       fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
  65       abort ();
  66     }
  67
  68   nbytes = sizeof (int);
  69
  70   ticks = (unsigned long) (200.0 * clkrate);
  71
  72   N = nprocs;
  73
  74   streams = (CUstream *) malloc (N * sizeof (void *));
  75
  76   a = (unsigned long **) malloc (N * sizeof (unsigned long *));
  77   d_a = (unsigned long **) malloc (N * sizeof (unsigned long *));
  78   tid = (unsigned long *) malloc (N * sizeof (unsigned long));
  79
  80   for (i = 0; i < N; i++)
  81     {
  82       a[i] = (unsigned long *) malloc (sizeof (unsigned long));
  83       *a[i] = N;
  84       d_a[i] = (unsigned long *) acc_malloc (nbytes);
  85       tid[i] = i;
  86
  87       acc_map_data (a[i], d_a[i], nbytes);
  88
  89       streams[i] = (CUstream) acc_get_cuda_stream (i);
  90       if (streams[i] != NULL)
  91         abort ();
  92
  93       r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
  94       if (r != CUDA_SUCCESS)
  95         {
  96           fprintf (stderr, "cuStreamCreate failed: %d\n", r);
  97           abort ();
  98         }
  99
 100        if (!acc_set_cuda_stream (i, streams[i]))
 101         abort ();
 102     }
 103
 104   for (i = 0; i < N; i++)
 105     {
 106       kargs[0] = (void *) &d_a[i];
 107       kargs[1] = (void *) &ticks;
 108       kargs[2] = (void *) &tid[i];
 109
 110       r = cuLaunchKernel (delay2, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
 111       if (r != CUDA_SUCCESS)
 112         {
 113           fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
 114           abort ();
 115         }
 116
 117       ticks = (unsigned long) (50.0 * clkrate);
 118     }
 119
 120   acc_wait_all_async (0);
 121
 122   for (i = 0; i < N; i++)
 123     {
 124       acc_memcpy_from_device (a[i], d_a[i], nbytes);
 125       if (*a[i] != i)
 126         abort ();
 127
 128       acc_unmap_data (a[i]);
 129
 130       acc_free (d_a[i]);
 131     }
 132
 133   free (streams);
 134
 135   for (i = 0; i < N; i++)
 136     {
 137       free (a[i]);
 138     }
 139
 140   free (a);
 141   free (d_a);
 142   free (tid);
 143
 144   acc_shutdown (acc_device_nvidia);
 145
 146   exit (0);
 147 }
 148
 149 /* { dg-output "" } */