drd/tests/swapcontext: Improve the portability of this test further
[valgrind.git] / drd / tests / matinv.c
blob1369a699f48b1c4d7efa4a813cb049efbbfd7740
1 /** Compute the matrix inverse via Gauss-Jordan elimination.
2 * This program uses only barriers to separate computation steps but no
3 * mutexes. It is an example of a race-free program on which no data races
4 * are reported by the happens-before algorithm (drd), but a lot of data races
5 * (all false positives) are reported by the Eraser-algorithm (helgrind).
6 */
9 #define _GNU_SOURCE
11 /***********************/
12 /* Include directives. */
13 /***********************/
15 #include <assert.h>
16 #include <math.h>
17 #include <limits.h> // PTHREAD_STACK_MIN
18 #include <pthread.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h> // getopt()
24 /*********************/
25 /* Type definitions. */
26 /*********************/
28 typedef double elem_t;
30 struct gj_threadinfo
32 pthread_barrier_t* b;
33 pthread_t tid;
34 elem_t* a;
35 int rows;
36 int cols;
37 int r0;
38 int r1;
42 /********************/
43 /* Local variables. */
44 /********************/
46 static int s_nthread = 1;
49 /*************************/
50 /* Function definitions. */
51 /*************************/
53 /** Allocate memory for a matrix with the specified number of rows and
54 * columns.
56 static elem_t* new_matrix(const int rows, const int cols)
58 assert(rows > 0);
59 assert(cols > 0);
60 return malloc(rows * cols * sizeof(elem_t));
63 /** Free the memory that was allocated for a matrix. */
64 static void delete_matrix(elem_t* const a)
66 free(a);
69 /** Fill in some numbers in a matrix. */
70 static void init_matrix(elem_t* const a, const int rows, const int cols)
72 int i, j;
73 for (i = 0; i < rows; i++)
75 for (j = 0; j < rows; j++)
77 a[i * cols + j] = 1.0 / (1 + abs(i-j));
82 /** Print all elements of a matrix. */
83 void print_matrix(const char* const label,
84 const elem_t* const a, const int rows, const int cols)
86 int i, j;
87 printf("%s:\n", label);
88 for (i = 0; i < rows; i++)
90 for (j = 0; j < cols; j++)
92 printf("%g ", a[i * cols + j]);
94 printf("\n");
98 /** Copy a subset of the elements of a matrix into another matrix. */
99 static void copy_matrix(const elem_t* const from,
100 const int from_rows,
101 const int from_cols,
102 const int from_row_first,
103 const int from_row_last,
104 const int from_col_first,
105 const int from_col_last,
106 elem_t* const to,
107 const int to_rows,
108 const int to_cols,
109 const int to_row_first,
110 const int to_row_last,
111 const int to_col_first,
112 const int to_col_last)
114 int i, j;
116 assert(from_row_last - from_row_first == to_row_last - to_row_first);
117 assert(from_col_last - from_col_first == to_col_last - to_col_first);
119 for (i = from_row_first; i < from_row_last; i++)
121 assert(i < from_rows);
122 assert(i - from_row_first + to_row_first < to_rows);
123 for (j = from_col_first; j < from_col_last; j++)
125 assert(j < from_cols);
126 assert(j - from_col_first + to_col_first < to_cols);
127 to[(i - from_row_first + to_col_first) * to_cols
128 + (j - from_col_first + to_col_first)]
129 = from[i * from_cols + j];
134 /** Compute the matrix product of a1 and a2. */
135 static elem_t* multiply_matrices(const elem_t* const a1,
136 const int rows1,
137 const int cols1,
138 const elem_t* const a2,
139 const int rows2,
140 const int cols2)
142 int i, j, k;
143 elem_t* prod;
145 assert(cols1 == rows2);
147 prod = new_matrix(rows1, cols2);
148 for (i = 0; i < rows1; i++)
150 for (j = 0; j < cols2; j++)
152 prod[i * cols2 + j] = 0;
153 for (k = 0; k < cols1; k++)
155 prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j];
159 return prod;
162 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
163 * at row r0 and up to but not including row r1. It is assumed that as many
164 * threads execute this function concurrently as the count barrier p->b was
165 * initialized with. If the matrix p->a is nonsingular, and if matrix p->a
166 * has at least as many columns as rows, the result of this algorithm is that
167 * submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
168 * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
170 static void *gj_threadfunc(void *arg)
172 struct gj_threadinfo* p = arg;
173 int i, j, k;
174 elem_t* const a = p->a;
175 const int rows = p->rows;
176 const int cols = p->cols;
177 elem_t aii;
179 for (i = 0; i < p->rows; i++)
181 if (pthread_barrier_wait(p->b) == PTHREAD_BARRIER_SERIAL_THREAD)
183 // Pivoting.
184 j = i;
185 for (k = i + 1; k < rows; k++)
187 if (a[k * cols + i] > a[j * cols + i])
189 j = k;
192 if (j != i)
194 for (k = 0; k < cols; k++)
196 const elem_t t = a[i * cols + k];
197 a[i * cols + k] = a[j * cols + k];
198 a[j * cols + k] = t;
201 // Normalize row i.
202 aii = a[i * cols + i];
203 if (aii != 0)
204 for (k = i; k < cols; k++)
205 a[i * cols + k] /= aii;
207 pthread_barrier_wait(p->b);
208 // Reduce all rows j != i.
209 for (j = p->r0; j < p->r1; j++)
211 if (i != j)
213 const elem_t factor = a[j * cols + i];
214 for (k = 0; k < cols; k++)
216 a[j * cols + k] -= a[i * cols + k] * factor;
221 return NULL;
224 /** Multithreaded Gauss-Jordan algorithm. */
225 static void gj(elem_t* const a, const int rows, const int cols)
227 int i;
228 struct gj_threadinfo* t;
229 pthread_barrier_t b;
230 pthread_attr_t attr;
231 int err;
233 assert(rows <= cols);
235 t = malloc(sizeof(struct gj_threadinfo) * s_nthread);
237 pthread_barrier_init(&b, 0, s_nthread);
239 pthread_attr_init(&attr);
240 err = pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN + 4096);
241 assert(err == 0);
243 for (i = 0; i < s_nthread; i++)
245 t[i].b = &b;
246 t[i].a = a;
247 t[i].rows = rows;
248 t[i].cols = cols;
249 t[i].r0 = i * rows / s_nthread;
250 t[i].r1 = (i+1) * rows / s_nthread;
251 pthread_create(&t[i].tid, &attr, gj_threadfunc, &t[i]);
254 pthread_attr_destroy(&attr);
256 for (i = 0; i < s_nthread; i++)
258 pthread_join(t[i].tid, 0);
261 pthread_barrier_destroy(&b);
263 free(t);
266 /** Matrix inversion via the Gauss-Jordan algorithm. */
267 static elem_t* invert_matrix(const elem_t* const a, const int n)
269 int i, j;
270 elem_t* const inv = new_matrix(n, n);
271 elem_t* const tmp = new_matrix(n, 2*n);
272 copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n);
273 for (i = 0; i < n; i++)
274 for (j = 0; j < n; j++)
275 tmp[i * 2 * n + n + j] = (i == j);
276 gj(tmp, n, 2*n);
277 copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n);
278 delete_matrix(tmp);
279 return inv;
282 /** Compute the average square error between the identity matrix and the
283 * product of matrix a with its inverse matrix.
285 static double identity_error(const elem_t* const a, const int n)
287 int i, j;
288 elem_t e = 0;
289 for (i = 0; i < n; i++)
291 for (j = 0; j < n; j++)
293 const elem_t d = a[i * n + j] - (i == j);
294 e += d * d;
297 return sqrt(e / (n * n));
300 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
301 * smallest number for which the sum of one and that number is different of
302 * one. It is assumed that the underlying representation of elem_t uses
303 * base two.
305 static elem_t epsilon()
307 elem_t eps;
308 for (eps = 1; 1 + eps != 1; eps /= 2)
310 return 2 * eps;
313 int main(int argc, char** argv)
315 int matrix_size;
316 int silent = 0;
317 int optchar;
318 elem_t *a, *inv, *prod;
319 elem_t eps;
320 double error;
321 double ratio;
323 while ((optchar = getopt(argc, argv, "qt:")) != EOF)
325 switch (optchar)
327 case 'q': silent = 1; break;
328 case 't': s_nthread = atoi(optarg); break;
329 default:
330 fprintf(stderr, "Error: unknown option '%c'.\n", optchar);
331 return 1;
335 if (optind + 1 != argc)
337 fprintf(stderr, "Error: wrong number of arguments.\n");
338 return 1;
340 matrix_size = atoi(argv[optind]);
342 /* Error checking. */
343 assert(matrix_size >= 1);
344 assert(s_nthread >= 1);
346 eps = epsilon();
347 a = new_matrix(matrix_size, matrix_size);
348 init_matrix(a, matrix_size, matrix_size);
349 inv = invert_matrix(a, matrix_size);
350 prod = multiply_matrices(a, matrix_size, matrix_size,
351 inv, matrix_size, matrix_size);
352 error = identity_error(prod, matrix_size);
353 ratio = error / (eps * matrix_size);
354 if (! silent)
356 printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
357 error, eps, ratio);
359 if (isfinite(ratio) && ratio < 100)
360 printf("Error within bounds.\n");
361 else
362 printf("Error out of bounds.\n");
363 delete_matrix(prod);
364 delete_matrix(inv);
365 delete_matrix(a);
367 return 0;