1 /** Compute the matrix inverse via Gauss-Jordan elimination.
2 * This program uses only barriers to separate computation steps but no
3 * mutexes. It is an example of a race-free program on which no data races
4 * are reported by the happens-before algorithm (drd), but a lot of data races
5 * (all false positives) are reported by the Eraser-algorithm (helgrind).
11 /***********************/
12 /* Include directives. */
13 /***********************/
17 #include <limits.h> // PTHREAD_STACK_MIN
21 #include <unistd.h> // getopt()
24 /*********************/
25 /* Type definitions. */
26 /*********************/
28 typedef double elem_t
;
42 /********************/
43 /* Local variables. */
44 /********************/
46 static int s_nthread
= 1;
49 /*************************/
50 /* Function definitions. */
51 /*************************/
53 /** Allocate memory for a matrix with the specified number of rows and
56 static elem_t
* new_matrix(const int rows
, const int cols
)
60 return malloc(rows
* cols
* sizeof(elem_t
));
63 /** Free the memory that was allocated for a matrix. */
64 static void delete_matrix(elem_t
* const a
)
69 /** Fill in some numbers in a matrix. */
70 static void init_matrix(elem_t
* const a
, const int rows
, const int cols
)
73 for (i
= 0; i
< rows
; i
++)
75 for (j
= 0; j
< rows
; j
++)
77 a
[i
* cols
+ j
] = 1.0 / (1 + abs(i
-j
));
82 /** Print all elements of a matrix. */
83 void print_matrix(const char* const label
,
84 const elem_t
* const a
, const int rows
, const int cols
)
87 printf("%s:\n", label
);
88 for (i
= 0; i
< rows
; i
++)
90 for (j
= 0; j
< cols
; j
++)
92 printf("%g ", a
[i
* cols
+ j
]);
98 /** Copy a subset of the elements of a matrix into another matrix. */
99 static void copy_matrix(const elem_t
* const from
,
102 const int from_row_first
,
103 const int from_row_last
,
104 const int from_col_first
,
105 const int from_col_last
,
109 const int to_row_first
,
110 const int to_row_last
,
111 const int to_col_first
,
112 const int to_col_last
)
116 assert(from_row_last
- from_row_first
== to_row_last
- to_row_first
);
117 assert(from_col_last
- from_col_first
== to_col_last
- to_col_first
);
119 for (i
= from_row_first
; i
< from_row_last
; i
++)
121 assert(i
< from_rows
);
122 assert(i
- from_row_first
+ to_row_first
< to_rows
);
123 for (j
= from_col_first
; j
< from_col_last
; j
++)
125 assert(j
< from_cols
);
126 assert(j
- from_col_first
+ to_col_first
< to_cols
);
127 to
[(i
- from_row_first
+ to_col_first
) * to_cols
128 + (j
- from_col_first
+ to_col_first
)]
129 = from
[i
* from_cols
+ j
];
134 /** Compute the matrix product of a1 and a2. */
135 static elem_t
* multiply_matrices(const elem_t
* const a1
,
138 const elem_t
* const a2
,
145 assert(cols1
== rows2
);
147 prod
= new_matrix(rows1
, cols2
);
148 for (i
= 0; i
< rows1
; i
++)
150 for (j
= 0; j
< cols2
; j
++)
152 prod
[i
* cols2
+ j
] = 0;
153 for (k
= 0; k
< cols1
; k
++)
155 prod
[i
* cols2
+ j
] += a1
[i
* cols1
+ k
] * a2
[k
* cols2
+ j
];
162 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
163 * at row r0 and up to but not including row r1. It is assumed that as many
164 * threads execute this function concurrently as the count barrier p->b was
165 * initialized with. If the matrix p->a is nonsingular, and if matrix p->a
166 * has at least as many columns as rows, the result of this algorithm is that
167 * submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
168 * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
170 static void gj_threadfunc(struct gj_threadinfo
* p
)
173 elem_t
* const a
= p
->a
;
174 const int rows
= p
->rows
;
175 const int cols
= p
->cols
;
178 for (i
= 0; i
< p
->rows
; i
++)
180 if (pthread_barrier_wait(p
->b
) == PTHREAD_BARRIER_SERIAL_THREAD
)
184 for (k
= i
+ 1; k
< rows
; k
++)
186 if (a
[k
* cols
+ i
] > a
[j
* cols
+ i
])
193 for (k
= 0; k
< cols
; k
++)
195 const elem_t t
= a
[i
* cols
+ k
];
196 a
[i
* cols
+ k
] = a
[j
* cols
+ k
];
201 aii
= a
[i
* cols
+ i
];
203 for (k
= i
; k
< cols
; k
++)
204 a
[i
* cols
+ k
] /= aii
;
206 pthread_barrier_wait(p
->b
);
207 // Reduce all rows j != i.
208 for (j
= p
->r0
; j
< p
->r1
; j
++)
212 const elem_t factor
= a
[j
* cols
+ i
];
213 for (k
= 0; k
< cols
; k
++)
215 a
[j
* cols
+ k
] -= a
[i
* cols
+ k
] * factor
;
222 /** Multithreaded Gauss-Jordan algorithm. */
223 static void gj(elem_t
* const a
, const int rows
, const int cols
)
226 struct gj_threadinfo
* t
;
231 assert(rows
<= cols
);
233 t
= malloc(sizeof(struct gj_threadinfo
) * s_nthread
);
235 pthread_barrier_init(&b
, 0, s_nthread
);
237 pthread_attr_init(&attr
);
238 err
= pthread_attr_setstacksize(&attr
, PTHREAD_STACK_MIN
+ 4096);
241 for (i
= 0; i
< s_nthread
; i
++)
247 t
[i
].r0
= i
* rows
/ s_nthread
;
248 t
[i
].r1
= (i
+1) * rows
/ s_nthread
;
249 pthread_create(&t
[i
].tid
, &attr
, (void*(*)(void*))gj_threadfunc
, &t
[i
]);
252 pthread_attr_destroy(&attr
);
254 for (i
= 0; i
< s_nthread
; i
++)
256 pthread_join(t
[i
].tid
, 0);
259 pthread_barrier_destroy(&b
);
264 /** Matrix inversion via the Gauss-Jordan algorithm. */
265 static elem_t
* invert_matrix(const elem_t
* const a
, const int n
)
268 elem_t
* const inv
= new_matrix(n
, n
);
269 elem_t
* const tmp
= new_matrix(n
, 2*n
);
270 copy_matrix(a
, n
, n
, 0, n
, 0, n
, tmp
, n
, 2 * n
, 0, n
, 0, n
);
271 for (i
= 0; i
< n
; i
++)
272 for (j
= 0; j
< n
; j
++)
273 tmp
[i
* 2 * n
+ n
+ j
] = (i
== j
);
275 copy_matrix(tmp
, n
, 2*n
, 0, n
, n
, 2*n
, inv
, n
, n
, 0, n
, 0, n
);
280 /** Compute the average square error between the identity matrix and the
281 * product of matrix a with its inverse matrix.
283 static double identity_error(const elem_t
* const a
, const int n
)
287 for (i
= 0; i
< n
; i
++)
289 for (j
= 0; j
< n
; j
++)
291 const elem_t d
= a
[i
* n
+ j
] - (i
== j
);
295 return sqrt(e
/ (n
* n
));
298 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
299 * smallest number for which the sum of one and that number is different of
300 * one. It is assumed that the underlying representation of elem_t uses
303 static elem_t
epsilon()
306 for (eps
= 1; 1 + eps
!= 1; eps
/= 2)
311 int main(int argc
, char** argv
)
316 elem_t
*a
, *inv
, *prod
;
321 while ((optchar
= getopt(argc
, argv
, "qt:")) != EOF
)
325 case 'q': silent
= 1; break;
326 case 't': s_nthread
= atoi(optarg
); break;
328 fprintf(stderr
, "Error: unknown option '%c'.\n", optchar
);
333 if (optind
+ 1 != argc
)
335 fprintf(stderr
, "Error: wrong number of arguments.\n");
338 matrix_size
= atoi(argv
[optind
]);
340 /* Error checking. */
341 assert(matrix_size
>= 1);
342 assert(s_nthread
>= 1);
345 a
= new_matrix(matrix_size
, matrix_size
);
346 init_matrix(a
, matrix_size
, matrix_size
);
347 inv
= invert_matrix(a
, matrix_size
);
348 prod
= multiply_matrices(a
, matrix_size
, matrix_size
,
349 inv
, matrix_size
, matrix_size
);
350 error
= identity_error(prod
, matrix_size
);
351 ratio
= error
/ (eps
* matrix_size
);
354 printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
357 if (isfinite(ratio
) && ratio
< 100)
358 printf("Error within bounds.\n");
360 printf("Error out of bounds.\n");