1 /** Compute the matrix inverse via Gauss-Jordan elimination.
2 * This program uses OpenMP separate computation steps but no
3 * mutexes. It is an example of a race-free program on which no data races
4 * are reported by the happens-before algorithm (drd), but a lot of data races
5 * (all false positives) are reported by the Eraser-algorithm (helgrind).
11 /***********************/
12 /* Include directives. */
13 /***********************/
20 #include <unistd.h> // getopt()
23 /*********************/
24 /* Type definitions. */
25 /*********************/
27 typedef double elem_t
;
30 /********************/
31 /* Local variables. */
32 /********************/
34 static int s_trigger_race
;
37 /*************************/
38 /* Function definitions. */
39 /*************************/
41 /** Allocate memory for a matrix with the specified number of rows and
44 static elem_t
* new_matrix(const int rows
, const int cols
)
48 return malloc(rows
* cols
* sizeof(elem_t
));
51 /** Free the memory that was allocated for a matrix. */
52 static void delete_matrix(elem_t
* const a
)
57 /** Fill in some numbers in a matrix. */
58 static void init_matrix(elem_t
* const a
, const int rows
, const int cols
)
61 for (i
= 0; i
< rows
; i
++)
63 for (j
= 0; j
< rows
; j
++)
65 a
[i
* cols
+ j
] = 1.0 / (1 + abs(i
-j
));
70 /** Print all elements of a matrix. */
71 void print_matrix(const char* const label
,
72 const elem_t
* const a
, const int rows
, const int cols
)
75 printf("%s:\n", label
);
76 for (i
= 0; i
< rows
; i
++)
78 for (j
= 0; j
< cols
; j
++)
80 printf("%g ", a
[i
* cols
+ j
]);
86 /** Copy a subset of the elements of a matrix into another matrix. */
87 static void copy_matrix(const elem_t
* const from
,
90 const int from_row_first
,
91 const int from_row_last
,
92 const int from_col_first
,
93 const int from_col_last
,
97 const int to_row_first
,
98 const int to_row_last
,
99 const int to_col_first
,
100 const int to_col_last
)
104 assert(from_row_last
- from_row_first
== to_row_last
- to_row_first
);
105 assert(from_col_last
- from_col_first
== to_col_last
- to_col_first
);
107 for (i
= from_row_first
; i
< from_row_last
; i
++)
109 assert(i
< from_rows
);
110 assert(i
- from_row_first
+ to_row_first
< to_rows
);
111 for (j
= from_col_first
; j
< from_col_last
; j
++)
113 assert(j
< from_cols
);
114 assert(j
- from_col_first
+ to_col_first
< to_cols
);
115 to
[(i
- from_row_first
+ to_col_first
) * to_cols
116 + (j
- from_col_first
+ to_col_first
)]
117 = from
[i
* from_cols
+ j
];
122 /** Compute the matrix product of a1 and a2. */
123 static elem_t
* multiply_matrices(const elem_t
* const a1
,
126 const elem_t
* const a2
,
133 assert(cols1
== rows2
);
135 prod
= new_matrix(rows1
, cols2
);
136 for (i
= 0; i
< rows1
; i
++)
138 for (j
= 0; j
< cols2
; j
++)
140 prod
[i
* cols2
+ j
] = 0;
141 for (k
= 0; k
< cols1
; k
++)
143 prod
[i
* cols2
+ j
] += a1
[i
* cols1
+ k
] * a2
[k
* cols2
+ j
];
150 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
151 * at row r0 and up to but not including row r1. It is assumed that as many
152 * threads execute this function concurrently as the count barrier p->b was
153 * initialized with. If the matrix p->a is nonsingular, and if matrix p->a
154 * has at least as many columns as rows, the result of this algorithm is that
155 * submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
156 * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
158 static void gj(elem_t
* const a
, const int rows
, const int cols
)
162 for (i
= 0; i
< rows
; i
++)
167 for (k
= i
+ 1; k
< rows
; k
++)
169 if (a
[k
* cols
+ i
] > a
[j
* cols
+ i
])
176 for (k
= 0; k
< cols
; k
++)
178 const elem_t t
= a
[i
* cols
+ k
];
179 a
[i
* cols
+ k
] = a
[j
* cols
+ k
];
184 if (a
[i
* cols
+ i
] != 0)
186 for (k
= cols
- 1; k
>= 0; k
--)
188 a
[i
* cols
+ k
] /= a
[i
* cols
+ i
];
193 // Reduce all rows j != i.
197 # pragma omp parallel for private(j)
198 for (j
= 0; j
< rows
; j
++)
202 const elem_t factor
= a
[j
* cols
+ i
];
203 for (k
= 0; k
< cols
; k
++)
205 a
[j
* cols
+ k
] -= a
[i
* cols
+ k
] * factor
;
212 # pragma omp parallel for private(j, k)
213 for (j
= 0; j
< rows
; j
++)
217 const elem_t factor
= a
[j
* cols
+ i
];
218 for (k
= 0; k
< cols
; k
++)
220 a
[j
* cols
+ k
] -= a
[i
* cols
+ k
] * factor
;
228 /** Matrix inversion via the Gauss-Jordan algorithm. */
229 static elem_t
* invert_matrix(const elem_t
* const a
, const int n
)
232 elem_t
* const inv
= new_matrix(n
, n
);
233 elem_t
* const tmp
= new_matrix(n
, 2*n
);
234 copy_matrix(a
, n
, n
, 0, n
, 0, n
, tmp
, n
, 2 * n
, 0, n
, 0, n
);
235 for (i
= 0; i
< n
; i
++)
236 for (j
= 0; j
< n
; j
++)
237 tmp
[i
* 2 * n
+ n
+ j
] = (i
== j
);
239 copy_matrix(tmp
, n
, 2*n
, 0, n
, n
, 2*n
, inv
, n
, n
, 0, n
, 0, n
);
244 /** Compute the average square error between the identity matrix and the
245 * product of matrix a with its inverse matrix.
247 static double identity_error(const elem_t
* const a
, const int n
)
251 for (i
= 0; i
< n
; i
++)
253 for (j
= 0; j
< n
; j
++)
255 const elem_t d
= a
[i
* n
+ j
] - (i
== j
);
259 return sqrt(e
/ (n
* n
));
262 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
263 * smallest number for which the sum of one and that number is different of
264 * one. It is assumed that the underlying representation of elem_t uses
267 static elem_t
epsilon()
270 for (eps
= 1; 1 + eps
!= 1; eps
/= 2)
275 static void usage(const char* const exe
)
277 printf("Usage: %s [-h] [-q] [-r] [-t<n>] <m>\n"
278 "-h: display this information.\n"
279 "-q: quiet mode -- do not print computed error.\n"
280 "-r: trigger a race condition.\n"
281 "-t<n>: use <n> threads.\n"
282 "<m>: matrix size.\n",
286 int main(int argc
, char** argv
)
292 elem_t
*a
, *inv
, *prod
;
297 while ((optchar
= getopt(argc
, argv
, "hqrt:")) != EOF
)
301 case 'h': usage(argv
[0]); return 1;
302 case 'q': silent
= 1; break;
303 case 'r': s_trigger_race
= 1; break;
304 case 't': nthread
= atoi(optarg
); break;
310 if (optind
+ 1 != argc
)
312 fprintf(stderr
, "Error: wrong number of arguments.\n");
315 matrix_size
= atoi(argv
[optind
]);
317 /* Error checking. */
318 assert(matrix_size
>= 1);
319 assert(nthread
>= 1);
321 omp_set_num_threads(nthread
);
325 a
= new_matrix(matrix_size
, matrix_size
);
326 init_matrix(a
, matrix_size
, matrix_size
);
327 inv
= invert_matrix(a
, matrix_size
);
328 prod
= multiply_matrices(a
, matrix_size
, matrix_size
,
329 inv
, matrix_size
, matrix_size
);
330 error
= identity_error(prod
, matrix_size
);
331 ratio
= error
/ (eps
* matrix_size
);
334 printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
337 if (isfinite(ratio
) && ratio
< 100)
338 printf("Error within bounds.\n");
340 printf("Error out of bounds.\n");