drd/tests/matinv.c

   1 /** Compute the matrix inverse via Gauss-Jordan elimination.
   2  *  This program uses only barriers to separate computation steps but no
   3  *  mutexes. It is an example of a race-free program on which no data races
   4  *  are reported by the happens-before algorithm (drd), but a lot of data races
   5  *  (all false positives) are reported by the Eraser-algorithm (helgrind).
   6  */
   7
   8
   9 #define _GNU_SOURCE
  10
  11 /***********************/
  12 /* Include directives. */
  13 /***********************/
  14
  15 #include <assert.h>
  16 #include <math.h>
  17 #include <limits.h>  // PTHREAD_STACK_MIN
  18 #include <pthread.h>
  19 #include <stdio.h>
  20 #include <stdlib.h>
  21 #include <unistd.h>  // getopt()
  22
  23
  24 /*********************/
  25 /* Type definitions. */
  26 /*********************/
  27
  28 typedef double elem_t;
  29
  30 struct gj_threadinfo
  31 {
  32   pthread_barrier_t* b;
  33   pthread_t tid;
  34   elem_t* a;
  35   int rows;
  36   int cols;
  37   int r0;
  38   int r1;
  39 };
  40
  41
  42 /********************/
  43 /* Local variables. */
  44 /********************/
  45
  46 static int s_nthread = 1;
  47
  48
  49 /*************************/
  50 /* Function definitions. */
  51 /*************************/
  52
  53 /** Allocate memory for a matrix with the specified number of rows and
  54  *  columns.
  55  */
  56 static elem_t* new_matrix(const int rows, const int cols)
  57 {
  58   assert(rows > 0);
  59   assert(cols > 0);
  60   return malloc(rows * cols * sizeof(elem_t));
  61 }
  62
  63 /** Free the memory that was allocated for a matrix. */
  64 static void delete_matrix(elem_t* const a)
  65 {
  66   free(a);
  67 }
  68
  69 /** Fill in some numbers in a matrix. */
  70 static void init_matrix(elem_t* const a, const int rows, const int cols)
  71 {
  72   int i, j;
  73   for (i = 0; i < rows; i++)
  74   {
  75     for (j = 0; j < rows; j++)
  76     {
  77       a[i * cols + j] = 1.0 / (1 + abs(i-j));
  78     }
  79   }
  80 }
  81
  82 /** Print all elements of a matrix. */
  83 void print_matrix(const char* const label,
  84                   const elem_t* const a, const int rows, const int cols)
  85 {
  86   int i, j;
  87   printf("%s:\n", label);
  88   for (i = 0; i < rows; i++)
  89   {
  90     for (j = 0; j < cols; j++)
  91     {
  92       printf("%g ", a[i * cols + j]);
  93     }
  94     printf("\n");
  95   }
  96 }
  97
  98 /** Copy a subset of the elements of a matrix into another matrix. */
  99 static void copy_matrix(const elem_t* const from,
 100                         const int from_rows,
 101                         const int from_cols,
 102                         const int from_row_first,
 103                         const int from_row_last,
 104                         const int from_col_first,
 105                         const int from_col_last,
 106                         elem_t* const to,
 107                         const int to_rows,
 108                         const int to_cols,
 109                         const int to_row_first,
 110                         const int to_row_last,
 111                         const int to_col_first,
 112                         const int to_col_last)
 113 {
 114   int i, j;
 115
 116   assert(from_row_last - from_row_first == to_row_last - to_row_first);
 117   assert(from_col_last - from_col_first == to_col_last - to_col_first);
 118
 119   for (i = from_row_first; i < from_row_last; i++)
 120   {
 121     assert(i < from_rows);
 122     assert(i - from_row_first + to_row_first < to_rows);
 123     for (j = from_col_first; j < from_col_last; j++)
 124     {
 125       assert(j < from_cols);
 126       assert(j - from_col_first + to_col_first < to_cols);
 127       to[(i - from_row_first + to_col_first) * to_cols
 128          + (j - from_col_first + to_col_first)]
 129         = from[i * from_cols + j];
 130     }
 131   }
 132 }
 133
 134 /** Compute the matrix product of a1 and a2. */
 135 static elem_t* multiply_matrices(const elem_t* const a1,
 136                                  const int rows1,
 137                                  const int cols1,
 138                                  const elem_t* const a2,
 139                                  const int rows2,
 140                                  const int cols2)
 141 {
 142   int i, j, k;
 143   elem_t* prod;
 144
 145   assert(cols1 == rows2);
 146
 147   prod = new_matrix(rows1, cols2);
 148   for (i = 0; i < rows1; i++)
 149   {
 150     for (j = 0; j < cols2; j++)
 151     {
 152       prod[i * cols2 + j] = 0;
 153       for (k = 0; k < cols1; k++)
 154       {
 155         prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j];
 156       }
 157     }
 158   }
 159   return prod;
 160 }
 161
 162 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
 163  *  at row r0 and up to but not including row r1. It is assumed that as many
 164  *  threads execute this function concurrently as the count barrier p->b was
 165  *  initialized with. If the matrix p->a is nonsingular, and if matrix p->a
 166  *  has at least as many columns as rows, the result of this algorithm is that
 167  *  submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
 168  * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
 169  */
 170 static void *gj_threadfunc(void *arg)
 171 {
 172   struct gj_threadinfo* p = arg;
 173   int i, j, k;
 174   elem_t* const a = p->a;
 175   const int rows = p->rows;
 176   const int cols = p->cols;
 177   elem_t aii;
 178
 179   for (i = 0; i < p->rows; i++)
 180   {
 181     if (pthread_barrier_wait(p->b) == PTHREAD_BARRIER_SERIAL_THREAD)
 182     {
 183       // Pivoting.
 184       j = i;
 185       for (k = i + 1; k < rows; k++)
 186       {
 187         if (a[k * cols + i] > a[j * cols + i])
 188         {
 189           j = k;
 190         }
 191       }
 192       if (j != i)
 193       {
 194         for (k = 0; k < cols; k++)
 195         {
 196           const elem_t t = a[i * cols + k];
 197           a[i * cols + k] = a[j * cols + k];
 198           a[j * cols + k] = t;
 199         }
 200       }
 201       // Normalize row i.
 202       aii = a[i * cols + i];
 203       if (aii != 0)
 204         for (k = i; k < cols; k++)
 205           a[i * cols + k] /= aii;
 206     }
 207     pthread_barrier_wait(p->b);
 208     // Reduce all rows j != i.
 209     for (j = p->r0; j < p->r1; j++)
 210     {
 211       if (i != j)
 212       {
 213         const elem_t factor = a[j * cols + i];
 214         for (k = 0; k < cols; k++)
 215         {
 216           a[j * cols + k] -= a[i * cols + k] * factor;
 217         }
 218       }
 219     }
 220   }
 221   return NULL;
 222 }
 223
 224 /** Multithreaded Gauss-Jordan algorithm. */
 225 static void gj(elem_t* const a, const int rows, const int cols)
 226 {
 227   int i;
 228   struct gj_threadinfo* t;
 229   pthread_barrier_t b;
 230   pthread_attr_t attr;
 231   int err;
 232
 233   assert(rows <= cols);
 234
 235   t = malloc(sizeof(struct gj_threadinfo) * s_nthread);
 236
 237   pthread_barrier_init(&b, 0, s_nthread);
 238
 239   pthread_attr_init(&attr);
 240   err = pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN + 4096);
 241   assert(err == 0);
 242
 243   for (i = 0; i < s_nthread; i++)
 244   {
 245     t[i].b = &b;
 246     t[i].a = a;
 247     t[i].rows = rows;
 248     t[i].cols = cols;
 249     t[i].r0 = i * rows / s_nthread;
 250     t[i].r1 = (i+1) * rows / s_nthread;
 251     pthread_create(&t[i].tid, &attr, gj_threadfunc, &t[i]);
 252   }
 253
 254   pthread_attr_destroy(&attr);
 255
 256   for (i = 0; i < s_nthread; i++)
 257   {
 258     pthread_join(t[i].tid, 0);
 259   }
 260
 261   pthread_barrier_destroy(&b);
 262
 263   free(t);
 264 }
 265
 266 /** Matrix inversion via the Gauss-Jordan algorithm. */
 267 static elem_t* invert_matrix(const elem_t* const a, const int n)
 268 {
 269   int i, j;
 270   elem_t* const inv = new_matrix(n, n);
 271   elem_t* const tmp = new_matrix(n, 2*n);
 272   copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n);
 273   for (i = 0; i < n; i++)
 274     for (j = 0; j < n; j++)
 275       tmp[i * 2 * n + n + j] = (i == j);
 276   gj(tmp, n, 2*n);
 277   copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n);
 278   delete_matrix(tmp);
 279   return inv;
 280 }
 281
 282 /** Compute the average square error between the identity matrix and the
 283  * product of matrix a with its inverse matrix.
 284  */
 285 static double identity_error(const elem_t* const a, const int n)
 286 {
 287   int i, j;
 288   elem_t e = 0;
 289   for (i = 0; i < n; i++)
 290   {
 291     for (j = 0; j < n; j++)
 292     {
 293       const elem_t d = a[i * n + j] - (i == j);
 294       e += d * d;
 295     }
 296   }
 297   return sqrt(e / (n * n));
 298 }
 299
 300 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
 301  *  smallest number for which the sum of one and that number is different of
 302  *  one. It is assumed that the underlying representation of elem_t uses
 303  *  base two.
 304  */
 305 static elem_t epsilon()
 306 {
 307   elem_t eps;
 308   for (eps = 1; 1 + eps != 1; eps /= 2)
 309     ;
 310   return 2 * eps;
 311 }
 312
 313 int main(int argc, char** argv)
 314 {
 315   int matrix_size;
 316   int silent = 0;
 317   int optchar;
 318   elem_t *a, *inv, *prod;
 319   elem_t eps;
 320   double error;
 321   double ratio;
 322
 323   while ((optchar = getopt(argc, argv, "qt:")) != EOF)
 324   {
 325     switch (optchar)
 326     {
 327     case 'q': silent = 1; break;
 328     case 't': s_nthread = atoi(optarg); break;
 329     default:
 330       fprintf(stderr, "Error: unknown option '%c'.\n", optchar);
 331       return 1;
 332     }
 333   }
 334
 335   if (optind + 1 != argc)
 336   {
 337     fprintf(stderr, "Error: wrong number of arguments.\n");
 338     return 1;
 339   }
 340   matrix_size = atoi(argv[optind]);
 341
 342   /* Error checking. */
 343   assert(matrix_size >= 1);
 344   assert(s_nthread >= 1);
 345
 346   eps = epsilon();
 347   a = new_matrix(matrix_size, matrix_size);
 348   init_matrix(a, matrix_size, matrix_size);
 349   inv = invert_matrix(a, matrix_size);
 350   prod = multiply_matrices(a, matrix_size, matrix_size,
 351                            inv, matrix_size, matrix_size);
 352   error = identity_error(prod, matrix_size);
 353   ratio = error / (eps * matrix_size);
 354   if (! silent)
 355   {
 356     printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
 357            error, eps, ratio);
 358   }
 359   if (isfinite(ratio) && ratio < 100)
 360     printf("Error within bounds.\n");
 361   else
 362     printf("Error out of bounds.\n");
 363   delete_matrix(prod);
 364   delete_matrix(inv);
 365   delete_matrix(a);
 366
 367   return 0;
 368 }