drd/tests/omp_matinv.c

   1 /** Compute the matrix inverse via Gauss-Jordan elimination.
   2  *  This program uses OpenMP separate computation steps but no
   3  *  mutexes. It is an example of a race-free program on which no data races
   4  *  are reported by the happens-before algorithm (drd), but a lot of data races
   5  *  (all false positives) are reported by the Eraser-algorithm (helgrind).
   6  */
   7
   8
   9 #define _GNU_SOURCE
  10
  11 /***********************/
  12 /* Include directives. */
  13 /***********************/
  14
  15 #include <assert.h>
  16 #include <math.h>
  17 #include <omp.h>
  18 #include <stdio.h>
  19 #include <stdlib.h>
  20 #include <unistd.h>  // getopt()
  21
  22
  23 /*********************/
  24 /* Type definitions. */
  25 /*********************/
  26
  27 typedef double elem_t;
  28
  29
  30 /********************/
  31 /* Local variables. */
  32 /********************/
  33
  34 static int s_trigger_race;
  35
  36
  37 /*************************/
  38 /* Function definitions. */
  39 /*************************/
  40
  41 /** Allocate memory for a matrix with the specified number of rows and
  42  *  columns.
  43  */
  44 static elem_t* new_matrix(const int rows, const int cols)
  45 {
  46   assert(rows > 0);
  47   assert(cols > 0);
  48   return malloc(rows * cols * sizeof(elem_t));
  49 }
  50
  51 /** Free the memory that was allocated for a matrix. */
  52 static void delete_matrix(elem_t* const a)
  53 {
  54   free(a);
  55 }
  56
  57 /** Fill in some numbers in a matrix. */
  58 static void init_matrix(elem_t* const a, const int rows, const int cols)
  59 {
  60   int i, j;
  61   for (i = 0; i < rows; i++)
  62   {
  63     for (j = 0; j < rows; j++)
  64     {
  65       a[i * cols + j] = 1.0 / (1 + abs(i-j));
  66     }
  67   }
  68 }
  69
  70 /** Print all elements of a matrix. */
  71 void print_matrix(const char* const label,
  72                   const elem_t* const a, const int rows, const int cols)
  73 {
  74   int i, j;
  75   printf("%s:\n", label);
  76   for (i = 0; i < rows; i++)
  77   {
  78     for (j = 0; j < cols; j++)
  79     {
  80       printf("%g ", a[i * cols + j]);
  81     }
  82     printf("\n");
  83   }
  84 }
  85
  86 /** Copy a subset of the elements of a matrix into another matrix. */
  87 static void copy_matrix(const elem_t* const from,
  88                         const int from_rows,
  89                         const int from_cols,
  90                         const int from_row_first,
  91                         const int from_row_last,
  92                         const int from_col_first,
  93                         const int from_col_last,
  94                         elem_t* const to,
  95                         const int to_rows,
  96                         const int to_cols,
  97                         const int to_row_first,
  98                         const int to_row_last,
  99                         const int to_col_first,
 100                         const int to_col_last)
 101 {
 102   int i, j;
 103
 104   assert(from_row_last - from_row_first == to_row_last - to_row_first);
 105   assert(from_col_last - from_col_first == to_col_last - to_col_first);
 106
 107   for (i = from_row_first; i < from_row_last; i++)
 108   {
 109     assert(i < from_rows);
 110     assert(i - from_row_first + to_row_first < to_rows);
 111     for (j = from_col_first; j < from_col_last; j++)
 112     {
 113       assert(j < from_cols);
 114       assert(j - from_col_first + to_col_first < to_cols);
 115       to[(i - from_row_first + to_col_first) * to_cols
 116          + (j - from_col_first + to_col_first)]
 117         = from[i * from_cols + j];
 118     }
 119   }
 120 }
 121
 122 /** Compute the matrix product of a1 and a2. */
 123 static elem_t* multiply_matrices(const elem_t* const a1,
 124                                  const int rows1,
 125                                  const int cols1,
 126                                  const elem_t* const a2,
 127                                  const int rows2,
 128                                  const int cols2)
 129 {
 130   int i, j, k;
 131   elem_t* prod;
 132
 133   assert(cols1 == rows2);
 134
 135   prod = new_matrix(rows1, cols2);
 136   for (i = 0; i < rows1; i++)
 137   {
 138     for (j = 0; j < cols2; j++)
 139     {
 140       prod[i * cols2 + j] = 0;
 141       for (k = 0; k < cols1; k++)
 142       {
 143         prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j];
 144       }
 145     }
 146   }
 147   return prod;
 148 }
 149
 150 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
 151  *  at row r0 and up to but not including row r1. It is assumed that as many
 152  *  threads execute this function concurrently as the count barrier p->b was
 153  *  initialized with. If the matrix p->a is nonsingular, and if matrix p->a
 154  *  has at least as many columns as rows, the result of this algorithm is that
 155  *  submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
 156  * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
 157  */
 158 static void gj(elem_t* const a, const int rows, const int cols)
 159 {
 160   int i, j, k;
 161
 162   for (i = 0; i < rows; i++)
 163   {
 164     {
 165       // Pivoting.
 166       j = i;
 167       for (k = i + 1; k < rows; k++)
 168       {
 169         if (a[k * cols + i] > a[j * cols + i])
 170         {
 171           j = k;
 172         }
 173       }
 174       if (j != i)
 175       {
 176         for (k = 0; k < cols; k++)
 177         {
 178           const elem_t t = a[i * cols + k];
 179           a[i * cols + k] = a[j * cols + k];
 180           a[j * cols + k] = t;
 181         }
 182       }
 183       // Normalize row i.
 184       if (a[i * cols + i] != 0)
 185       {
 186         for (k = cols - 1; k >= 0; k--)
 187         {
 188           a[i * cols + k] /= a[i * cols + i];
 189         }
 190       }
 191     }
 192
 193     // Reduce all rows j != i.
 194
 195     if (s_trigger_race)
 196     {
 197 #     pragma omp parallel for private(j)
 198       for (j = 0; j < rows; j++)
 199       {
 200         if (i != j)
 201         {
 202           const elem_t factor = a[j * cols + i];
 203           for (k = 0; k < cols; k++)
 204           {
 205             a[j * cols + k] -= a[i * cols + k] * factor;
 206           }
 207         }
 208       }
 209     }
 210     else
 211     {
 212 #     pragma omp parallel for private(j, k)
 213       for (j = 0; j < rows; j++)
 214       {
 215         if (i != j)
 216         {
 217           const elem_t factor = a[j * cols + i];
 218           for (k = 0; k < cols; k++)
 219           {
 220             a[j * cols + k] -= a[i * cols + k] * factor;
 221           }
 222         }
 223       }
 224     }
 225   }
 226 }
 227
 228 /** Matrix inversion via the Gauss-Jordan algorithm. */
 229 static elem_t* invert_matrix(const elem_t* const a, const int n)
 230 {
 231   int i, j;
 232   elem_t* const inv = new_matrix(n, n);
 233   elem_t* const tmp = new_matrix(n, 2*n);
 234   copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n);
 235   for (i = 0; i < n; i++)
 236     for (j = 0; j < n; j++)
 237       tmp[i * 2 * n + n + j] = (i == j);
 238   gj(tmp, n, 2*n);
 239   copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n);
 240   delete_matrix(tmp);
 241   return inv;
 242 }
 243
 244 /** Compute the average square error between the identity matrix and the
 245  * product of matrix a with its inverse matrix.
 246  */
 247 static double identity_error(const elem_t* const a, const int n)
 248 {
 249   int i, j;
 250   elem_t e = 0;
 251   for (i = 0; i < n; i++)
 252   {
 253     for (j = 0; j < n; j++)
 254     {
 255       const elem_t d = a[i * n + j] - (i == j);
 256       e += d * d;
 257     }
 258   }
 259   return sqrt(e / (n * n));
 260 }
 261
 262 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
 263  *  smallest number for which the sum of one and that number is different of
 264  *  one. It is assumed that the underlying representation of elem_t uses
 265  *  base two.
 266  */
 267 static elem_t epsilon()
 268 {
 269   elem_t eps;
 270   for (eps = 1; 1 + eps != 1; eps /= 2)
 271     ;
 272   return 2 * eps;
 273 }
 274
 275 static void usage(const char* const exe)
 276 {
 277   printf("Usage: %s [-h] [-q] [-r] [-t<n>] <m>\n"
 278          "-h: display this information.\n"
 279          "-q: quiet mode -- do not print computed error.\n"
 280          "-r: trigger a race condition.\n"
 281          "-t<n>: use <n> threads.\n"
 282          "<m>: matrix size.\n",
 283          exe);
 284 }
 285
 286 int main(int argc, char** argv)
 287 {
 288   int matrix_size;
 289   int nthread = 1;
 290   int silent = 0;
 291   int optchar;
 292   elem_t *a, *inv, *prod;
 293   elem_t eps;
 294   double error;
 295   double ratio;
 296
 297   while ((optchar = getopt(argc, argv, "hqrt:")) != EOF)
 298   {
 299     switch (optchar)
 300     {
 301     case 'h': usage(argv[0]); return 1;
 302     case 'q': silent = 1; break;
 303     case 'r': s_trigger_race = 1; break;
 304     case 't': nthread = atoi(optarg); break;
 305     default:
 306       return 1;
 307     }
 308   }
 309
 310   if (optind + 1 != argc)
 311   {
 312     fprintf(stderr, "Error: wrong number of arguments.\n");
 313     return 1;
 314   }
 315   matrix_size = atoi(argv[optind]);
 316
 317   /* Error checking. */
 318   assert(matrix_size >= 1);
 319   assert(nthread >= 1);
 320
 321   omp_set_num_threads(nthread);
 322   omp_set_dynamic(0);
 323
 324   eps = epsilon();
 325   a = new_matrix(matrix_size, matrix_size);
 326   init_matrix(a, matrix_size, matrix_size);
 327   inv = invert_matrix(a, matrix_size);
 328   prod = multiply_matrices(a, matrix_size, matrix_size,
 329                            inv, matrix_size, matrix_size);
 330   error = identity_error(prod, matrix_size);
 331   ratio = error / (eps * matrix_size);
 332   if (! silent)
 333   {
 334     printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
 335            error, eps, ratio);
 336   }
 337   if (isfinite(ratio) && ratio < 100)
 338     printf("Error within bounds.\n");
 339   else
 340     printf("Error out of bounds.\n");
 341   delete_matrix(prod);
 342   delete_matrix(inv);
 343   delete_matrix(a);
 344
 345   return 0;
 346 }