drd: Add a consistency check
[valgrind.git] / drd / tests / omp_matinv.c
blobcd402ab0f4e9f3917bdf9cb74381e1946137bc74
1 /** Compute the matrix inverse via Gauss-Jordan elimination.
2 * This program uses OpenMP separate computation steps but no
3 * mutexes. It is an example of a race-free program on which no data races
4 * are reported by the happens-before algorithm (drd), but a lot of data races
5 * (all false positives) are reported by the Eraser-algorithm (helgrind).
6 */
9 #define _GNU_SOURCE
11 /***********************/
12 /* Include directives. */
13 /***********************/
15 #include <assert.h>
16 #include <math.h>
17 #include <omp.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <unistd.h> // getopt()
23 /*********************/
24 /* Type definitions. */
25 /*********************/
27 typedef double elem_t;
30 /********************/
31 /* Local variables. */
32 /********************/
34 static int s_trigger_race;
37 /*************************/
38 /* Function definitions. */
39 /*************************/
41 /** Allocate memory for a matrix with the specified number of rows and
42 * columns.
44 static elem_t* new_matrix(const int rows, const int cols)
46 assert(rows > 0);
47 assert(cols > 0);
48 return malloc(rows * cols * sizeof(elem_t));
51 /** Free the memory that was allocated for a matrix. */
52 static void delete_matrix(elem_t* const a)
54 free(a);
57 /** Fill in some numbers in a matrix. */
58 static void init_matrix(elem_t* const a, const int rows, const int cols)
60 int i, j;
61 for (i = 0; i < rows; i++)
63 for (j = 0; j < rows; j++)
65 a[i * cols + j] = 1.0 / (1 + abs(i-j));
70 /** Print all elements of a matrix. */
71 void print_matrix(const char* const label,
72 const elem_t* const a, const int rows, const int cols)
74 int i, j;
75 printf("%s:\n", label);
76 for (i = 0; i < rows; i++)
78 for (j = 0; j < cols; j++)
80 printf("%g ", a[i * cols + j]);
82 printf("\n");
86 /** Copy a subset of the elements of a matrix into another matrix. */
87 static void copy_matrix(const elem_t* const from,
88 const int from_rows,
89 const int from_cols,
90 const int from_row_first,
91 const int from_row_last,
92 const int from_col_first,
93 const int from_col_last,
94 elem_t* const to,
95 const int to_rows,
96 const int to_cols,
97 const int to_row_first,
98 const int to_row_last,
99 const int to_col_first,
100 const int to_col_last)
102 int i, j;
104 assert(from_row_last - from_row_first == to_row_last - to_row_first);
105 assert(from_col_last - from_col_first == to_col_last - to_col_first);
107 for (i = from_row_first; i < from_row_last; i++)
109 assert(i < from_rows);
110 assert(i - from_row_first + to_row_first < to_rows);
111 for (j = from_col_first; j < from_col_last; j++)
113 assert(j < from_cols);
114 assert(j - from_col_first + to_col_first < to_cols);
115 to[(i - from_row_first + to_col_first) * to_cols
116 + (j - from_col_first + to_col_first)]
117 = from[i * from_cols + j];
122 /** Compute the matrix product of a1 and a2. */
123 static elem_t* multiply_matrices(const elem_t* const a1,
124 const int rows1,
125 const int cols1,
126 const elem_t* const a2,
127 const int rows2,
128 const int cols2)
130 int i, j, k;
131 elem_t* prod;
133 assert(cols1 == rows2);
135 prod = new_matrix(rows1, cols2);
136 for (i = 0; i < rows1; i++)
138 for (j = 0; j < cols2; j++)
140 prod[i * cols2 + j] = 0;
141 for (k = 0; k < cols1; k++)
143 prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j];
147 return prod;
150 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
151 * at row r0 and up to but not including row r1. It is assumed that as many
152 * threads execute this function concurrently as the count barrier p->b was
153 * initialized with. If the matrix p->a is nonsingular, and if matrix p->a
154 * has at least as many columns as rows, the result of this algorithm is that
155 * submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
156 * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
158 static void gj(elem_t* const a, const int rows, const int cols)
160 int i, j, k;
162 for (i = 0; i < rows; i++)
165 // Pivoting.
166 j = i;
167 for (k = i + 1; k < rows; k++)
169 if (a[k * cols + i] > a[j * cols + i])
171 j = k;
174 if (j != i)
176 for (k = 0; k < cols; k++)
178 const elem_t t = a[i * cols + k];
179 a[i * cols + k] = a[j * cols + k];
180 a[j * cols + k] = t;
183 // Normalize row i.
184 if (a[i * cols + i] != 0)
186 for (k = cols - 1; k >= 0; k--)
188 a[i * cols + k] /= a[i * cols + i];
193 // Reduce all rows j != i.
195 if (s_trigger_race)
197 # pragma omp parallel for private(j)
198 for (j = 0; j < rows; j++)
200 if (i != j)
202 const elem_t factor = a[j * cols + i];
203 for (k = 0; k < cols; k++)
205 a[j * cols + k] -= a[i * cols + k] * factor;
210 else
212 # pragma omp parallel for private(j, k)
213 for (j = 0; j < rows; j++)
215 if (i != j)
217 const elem_t factor = a[j * cols + i];
218 for (k = 0; k < cols; k++)
220 a[j * cols + k] -= a[i * cols + k] * factor;
228 /** Matrix inversion via the Gauss-Jordan algorithm. */
229 static elem_t* invert_matrix(const elem_t* const a, const int n)
231 int i, j;
232 elem_t* const inv = new_matrix(n, n);
233 elem_t* const tmp = new_matrix(n, 2*n);
234 copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n);
235 for (i = 0; i < n; i++)
236 for (j = 0; j < n; j++)
237 tmp[i * 2 * n + n + j] = (i == j);
238 gj(tmp, n, 2*n);
239 copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n);
240 delete_matrix(tmp);
241 return inv;
244 /** Compute the average square error between the identity matrix and the
245 * product of matrix a with its inverse matrix.
247 static double identity_error(const elem_t* const a, const int n)
249 int i, j;
250 elem_t e = 0;
251 for (i = 0; i < n; i++)
253 for (j = 0; j < n; j++)
255 const elem_t d = a[i * n + j] - (i == j);
256 e += d * d;
259 return sqrt(e / (n * n));
262 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
263 * smallest number for which the sum of one and that number is different of
264 * one. It is assumed that the underlying representation of elem_t uses
265 * base two.
267 static elem_t epsilon()
269 elem_t eps;
270 for (eps = 1; 1 + eps != 1; eps /= 2)
272 return 2 * eps;
275 static void usage(const char* const exe)
277 printf("Usage: %s [-h] [-q] [-r] [-t<n>] <m>\n"
278 "-h: display this information.\n"
279 "-q: quiet mode -- do not print computed error.\n"
280 "-r: trigger a race condition.\n"
281 "-t<n>: use <n> threads.\n"
282 "<m>: matrix size.\n",
283 exe);
286 int main(int argc, char** argv)
288 int matrix_size;
289 int nthread = 1;
290 int silent = 0;
291 int optchar;
292 elem_t *a, *inv, *prod;
293 elem_t eps;
294 double error;
295 double ratio;
297 while ((optchar = getopt(argc, argv, "hqrt:")) != EOF)
299 switch (optchar)
301 case 'h': usage(argv[0]); return 1;
302 case 'q': silent = 1; break;
303 case 'r': s_trigger_race = 1; break;
304 case 't': nthread = atoi(optarg); break;
305 default:
306 return 1;
310 if (optind + 1 != argc)
312 fprintf(stderr, "Error: wrong number of arguments.\n");
313 return 1;
315 matrix_size = atoi(argv[optind]);
317 /* Error checking. */
318 assert(matrix_size >= 1);
319 assert(nthread >= 1);
321 omp_set_num_threads(nthread);
322 omp_set_dynamic(0);
324 eps = epsilon();
325 a = new_matrix(matrix_size, matrix_size);
326 init_matrix(a, matrix_size, matrix_size);
327 inv = invert_matrix(a, matrix_size);
328 prod = multiply_matrices(a, matrix_size, matrix_size,
329 inv, matrix_size, matrix_size);
330 error = identity_error(prod, matrix_size);
331 ratio = error / (eps * matrix_size);
332 if (! silent)
334 printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
335 error, eps, ratio);
337 if (isfinite(ratio) && ratio < 100)
338 printf("Error within bounds.\n");
339 else
340 printf("Error out of bounds.\n");
341 delete_matrix(prod);
342 delete_matrix(inv);
343 delete_matrix(a);
345 return 0;