1 /** Compute the matrix inverse via Gauss-Jordan elimination.
2 * This program uses only barriers to separate computation steps but no
3 * mutexes. It is an example of a race-free program on which no data races
4 * are reported by the happens-before algorithm (drd), but a lot of data races
5 * (all false positives) are reported by the Eraser-algorithm (helgrind).
11 /***********************/
12 /* Include directives. */
13 /***********************/
17 #include <limits.h> // PTHREAD_STACK_MIN
21 #include <unistd.h> // getopt()
24 /*********************/
25 /* Type definitions. */
26 /*********************/
28 typedef double elem_t
;
42 /********************/
43 /* Local variables. */
44 /********************/
46 static int s_nthread
= 1;
49 /*************************/
50 /* Function definitions. */
51 /*************************/
53 /** Allocate memory for a matrix with the specified number of rows and
56 static elem_t
* new_matrix(const int rows
, const int cols
)
60 return malloc(rows
* cols
* sizeof(elem_t
));
63 /** Free the memory that was allocated for a matrix. */
64 static void delete_matrix(elem_t
* const a
)
69 /** Fill in some numbers in a matrix. */
70 static void init_matrix(elem_t
* const a
, const int rows
, const int cols
)
73 for (i
= 0; i
< rows
; i
++)
75 for (j
= 0; j
< rows
; j
++)
77 a
[i
* cols
+ j
] = 1.0 / (1 + abs(i
-j
));
82 /** Print all elements of a matrix. */
83 void print_matrix(const char* const label
,
84 const elem_t
* const a
, const int rows
, const int cols
)
87 printf("%s:\n", label
);
88 for (i
= 0; i
< rows
; i
++)
90 for (j
= 0; j
< cols
; j
++)
92 printf("%g ", a
[i
* cols
+ j
]);
98 /** Copy a subset of the elements of a matrix into another matrix. */
99 static void copy_matrix(const elem_t
* const from
,
102 const int from_row_first
,
103 const int from_row_last
,
104 const int from_col_first
,
105 const int from_col_last
,
109 const int to_row_first
,
110 const int to_row_last
,
111 const int to_col_first
,
112 const int to_col_last
)
116 assert(from_row_last
- from_row_first
== to_row_last
- to_row_first
);
117 assert(from_col_last
- from_col_first
== to_col_last
- to_col_first
);
119 for (i
= from_row_first
; i
< from_row_last
; i
++)
121 assert(i
< from_rows
);
122 assert(i
- from_row_first
+ to_row_first
< to_rows
);
123 for (j
= from_col_first
; j
< from_col_last
; j
++)
125 assert(j
< from_cols
);
126 assert(j
- from_col_first
+ to_col_first
< to_cols
);
127 to
[(i
- from_row_first
+ to_col_first
) * to_cols
128 + (j
- from_col_first
+ to_col_first
)]
129 = from
[i
* from_cols
+ j
];
134 /** Compute the matrix product of a1 and a2. */
135 static elem_t
* multiply_matrices(const elem_t
* const a1
,
138 const elem_t
* const a2
,
145 assert(cols1
== rows2
);
147 prod
= new_matrix(rows1
, cols2
);
148 for (i
= 0; i
< rows1
; i
++)
150 for (j
= 0; j
< cols2
; j
++)
152 prod
[i
* cols2
+ j
] = 0;
153 for (k
= 0; k
< cols1
; k
++)
155 prod
[i
* cols2
+ j
] += a1
[i
* cols1
+ k
] * a2
[k
* cols2
+ j
];
162 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
163 * at row r0 and up to but not including row r1. It is assumed that as many
164 * threads execute this function concurrently as the count barrier p->b was
165 * initialized with. If the matrix p->a is nonsingular, and if matrix p->a
166 * has at least as many columns as rows, the result of this algorithm is that
167 * submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
168 * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
170 static void *gj_threadfunc(void *arg
)
172 struct gj_threadinfo
* p
= arg
;
174 elem_t
* const a
= p
->a
;
175 const int rows
= p
->rows
;
176 const int cols
= p
->cols
;
179 for (i
= 0; i
< p
->rows
; i
++)
181 if (pthread_barrier_wait(p
->b
) == PTHREAD_BARRIER_SERIAL_THREAD
)
185 for (k
= i
+ 1; k
< rows
; k
++)
187 if (a
[k
* cols
+ i
] > a
[j
* cols
+ i
])
194 for (k
= 0; k
< cols
; k
++)
196 const elem_t t
= a
[i
* cols
+ k
];
197 a
[i
* cols
+ k
] = a
[j
* cols
+ k
];
202 aii
= a
[i
* cols
+ i
];
204 for (k
= i
; k
< cols
; k
++)
205 a
[i
* cols
+ k
] /= aii
;
207 pthread_barrier_wait(p
->b
);
208 // Reduce all rows j != i.
209 for (j
= p
->r0
; j
< p
->r1
; j
++)
213 const elem_t factor
= a
[j
* cols
+ i
];
214 for (k
= 0; k
< cols
; k
++)
216 a
[j
* cols
+ k
] -= a
[i
* cols
+ k
] * factor
;
224 /** Multithreaded Gauss-Jordan algorithm. */
225 static void gj(elem_t
* const a
, const int rows
, const int cols
)
228 struct gj_threadinfo
* t
;
233 assert(rows
<= cols
);
235 t
= malloc(sizeof(struct gj_threadinfo
) * s_nthread
);
237 pthread_barrier_init(&b
, 0, s_nthread
);
239 pthread_attr_init(&attr
);
240 err
= pthread_attr_setstacksize(&attr
, PTHREAD_STACK_MIN
+ 4096);
243 for (i
= 0; i
< s_nthread
; i
++)
249 t
[i
].r0
= i
* rows
/ s_nthread
;
250 t
[i
].r1
= (i
+1) * rows
/ s_nthread
;
251 pthread_create(&t
[i
].tid
, &attr
, gj_threadfunc
, &t
[i
]);
254 pthread_attr_destroy(&attr
);
256 for (i
= 0; i
< s_nthread
; i
++)
258 pthread_join(t
[i
].tid
, 0);
261 pthread_barrier_destroy(&b
);
266 /** Matrix inversion via the Gauss-Jordan algorithm. */
267 static elem_t
* invert_matrix(const elem_t
* const a
, const int n
)
270 elem_t
* const inv
= new_matrix(n
, n
);
271 elem_t
* const tmp
= new_matrix(n
, 2*n
);
272 copy_matrix(a
, n
, n
, 0, n
, 0, n
, tmp
, n
, 2 * n
, 0, n
, 0, n
);
273 for (i
= 0; i
< n
; i
++)
274 for (j
= 0; j
< n
; j
++)
275 tmp
[i
* 2 * n
+ n
+ j
] = (i
== j
);
277 copy_matrix(tmp
, n
, 2*n
, 0, n
, n
, 2*n
, inv
, n
, n
, 0, n
, 0, n
);
282 /** Compute the average square error between the identity matrix and the
283 * product of matrix a with its inverse matrix.
285 static double identity_error(const elem_t
* const a
, const int n
)
289 for (i
= 0; i
< n
; i
++)
291 for (j
= 0; j
< n
; j
++)
293 const elem_t d
= a
[i
* n
+ j
] - (i
== j
);
297 return sqrt(e
/ (n
* n
));
300 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
301 * smallest number for which the sum of one and that number is different of
302 * one. It is assumed that the underlying representation of elem_t uses
305 static elem_t
epsilon()
308 for (eps
= 1; 1 + eps
!= 1; eps
/= 2)
313 int main(int argc
, char** argv
)
318 elem_t
*a
, *inv
, *prod
;
323 while ((optchar
= getopt(argc
, argv
, "qt:")) != EOF
)
327 case 'q': silent
= 1; break;
328 case 't': s_nthread
= atoi(optarg
); break;
330 fprintf(stderr
, "Error: unknown option '%c'.\n", optchar
);
335 if (optind
+ 1 != argc
)
337 fprintf(stderr
, "Error: wrong number of arguments.\n");
340 matrix_size
= atoi(argv
[optind
]);
342 /* Error checking. */
343 assert(matrix_size
>= 1);
344 assert(s_nthread
>= 1);
347 a
= new_matrix(matrix_size
, matrix_size
);
348 init_matrix(a
, matrix_size
, matrix_size
);
349 inv
= invert_matrix(a
, matrix_size
);
350 prod
= multiply_matrices(a
, matrix_size
, matrix_size
,
351 inv
, matrix_size
, matrix_size
);
352 error
= identity_error(prod
, matrix_size
);
353 ratio
= error
/ (eps
* matrix_size
);
356 printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
359 if (isfinite(ratio
) && ratio
< 100)
360 printf("Error within bounds.\n");
362 printf("Error out of bounds.\n");