3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
19 package org
.apache
.hadoop
.hbase
.util
;
21 import java
.util
.Arrays
;
22 import java
.util
.Deque
;
23 import java
.util
.LinkedList
;
25 import org
.apache
.hadoop
.hbase
.classification
.InterfaceAudience
;
28 * Computes the optimal (minimal cost) assignment of jobs to workers (or other
29 * analogous) concepts given a cost matrix of each pair of job and worker, using
30 * the algorithm by James Munkres in "Algorithms for the Assignment and
31 * Transportation Problems", with additional optimizations as described by Jin
32 * Kue Wong in "A New Implementation of an Algorithm for the Optimal Assignment
33 * Problem: An Improved Version of Munkres' Algorithm". The algorithm runs in
34 * O(n^3) time and need O(n^2) auxiliary space where n is the number of jobs or
35 * workers, whichever is greater.
37 @InterfaceAudience.Private
38 public class MunkresAssignment
{
40 // The original algorithm by Munkres uses the terms STAR and PRIME to denote
41 // different states of zero values in the cost matrix. These values are
42 // represented as byte constants instead of enums to save space in the mask
43 // matrix by a factor of 4n^2 where n is the size of the problem.
44 private static final byte NONE
= 0;
45 private static final byte STAR
= 1;
46 private static final byte PRIME
= 2;
48 // The algorithm requires that the number of column is at least as great as
49 // the number of rows. If that is not the case, then the cost matrix should
50 // be transposed before computation, and the solution matrix transposed before
51 // returning to the caller.
52 private final boolean transposed
;
54 // The number of rows of internal matrices.
55 private final int rows
;
57 // The number of columns of internal matrices.
58 private final int cols
;
60 // The cost matrix, the cost of assigning each row index to column index.
61 private float[][] cost
;
63 // Mask of zero cost assignment states.
64 private byte[][] mask
;
66 // Covering some rows of the cost matrix.
67 private boolean[] rowsCovered
;
69 // Covering some columns of the cost matrix.
70 private boolean[] colsCovered
;
72 // The alternating path between starred zeroes and primed zeroes
73 private Deque
<Pair
<Integer
, Integer
>> path
;
75 // The solution, marking which rows should be assigned to which columns. The
76 // positions of elements in this array correspond to the rows of the cost
77 // matrix, and the value of each element correspond to the columns of the cost
78 // matrix, i.e. assignments[i] = j indicates that row i should be assigned to
80 private int[] assignments
;
82 // Improvements described by Jin Kue Wong cache the least value in each row,
83 // as well as the column index of the least value in each row, and the pending
84 // adjustments to each row and each column.
85 private float[] leastInRow
;
86 private int[] leastInRowIndex
;
87 private float[] rowAdjust
;
88 private float[] colAdjust
;
91 * Construct a new problem instance with the specified cost matrix. The cost
92 * matrix must be rectangular, though not necessarily square. If one dimension
93 * is greater than the other, some elements in the greater dimension will not
94 * be assigned. The input cost matrix will not be modified.
97 public MunkresAssignment(float[][] costMatrix
) {
98 // The algorithm assumes that the number of columns is at least as great as
99 // the number of rows. If this is not the case of the input matrix, then
100 // all internal structures must be transposed relative to the input.
101 this.transposed
= costMatrix
.length
> costMatrix
[0].length
;
102 if (this.transposed
) {
103 this.rows
= costMatrix
[0].length
;
104 this.cols
= costMatrix
.length
;
106 this.rows
= costMatrix
.length
;
107 this.cols
= costMatrix
[0].length
;
110 cost
= new float[rows
][cols
];
111 mask
= new byte[rows
][cols
];
112 rowsCovered
= new boolean[rows
];
113 colsCovered
= new boolean[cols
];
114 path
= new LinkedList
<>();
116 leastInRow
= new float[rows
];
117 leastInRowIndex
= new int[rows
];
118 rowAdjust
= new float[rows
];
119 colAdjust
= new float[cols
];
125 for (int r
= 0; r
< rows
; r
++) {
126 for (int c
= 0; c
< cols
; c
++) {
127 cost
[r
][c
] = costMatrix
[c
][r
];
131 for (int r
= 0; r
< rows
; r
++) {
132 System
.arraycopy(costMatrix
[r
], 0, cost
[r
], 0, cols
);
136 // Costs must be finite otherwise the matrix can get into a bad state where
137 // no progress can be made. If your use case depends on a distinction
138 // between costs of MAX_VALUE and POSITIVE_INFINITY, you're doing it wrong.
139 for (int r
= 0; r
< rows
; r
++) {
140 for (int c
= 0; c
< cols
; c
++) {
141 if (cost
[r
][c
] == Float
.POSITIVE_INFINITY
) {
142 cost
[r
][c
] = Float
.MAX_VALUE
;
149 * Get the optimal assignments. The returned array will have the same number
150 * of elements as the number of elements as the number of rows in the input
151 * cost matrix. Each element will indicate which column should be assigned to
152 * that row or -1 if no column should be assigned, i.e. if result[i] = j then
153 * row i should be assigned to column j. Subsequent invocations of this method
154 * will simply return the same object without additional computation.
155 * @return an array with the optimal assignments
157 public int[] solve() {
158 // If this assignment problem has already been solved, return the known
160 if (assignments
!= null) {
166 // Find the optimal assignments.
167 while (!testIsDone()) {
174 // Extract the assignments from the mask matrix.
176 assignments
= new int[cols
];
178 for (int c
= 0; c
< cols
; c
++) {
179 for (int r
= 0; r
< rows
; r
++) {
180 if (mask
[r
][c
] == STAR
) {
185 // There is no assignment for this row of the input/output.
189 assignments
= new int[rows
];
191 for (int r
= 0; r
< rows
; r
++) {
192 for (int c
= 0; c
< cols
; c
++) {
193 if (mask
[r
][c
] == STAR
) {
201 // Once the solution has been computed, there is no need to keep any of the
202 // other internal structures. Clear all unnecessary internal references so
203 // the garbage collector may reclaim that memory.
210 leastInRowIndex
= null;
218 * Corresponds to the "preliminaries" step of the original algorithm.
219 * Guarantees that the matrix is an equivalent non-negative matrix with at
220 * least one zero in each row.
222 private void preliminaries() {
223 for (int r
= 0; r
< rows
; r
++) {
224 // Find the minimum cost of each row.
225 float min
= Float
.POSITIVE_INFINITY
;
226 for (int c
= 0; c
< cols
; c
++) {
227 min
= Math
.min(min
, cost
[r
][c
]);
230 // Subtract that minimum cost from each element in the row.
231 for (int c
= 0; c
< cols
; c
++) {
234 // If the element is now zero and there are no zeroes in the same row
235 // or column which are already starred, then star this one. There
236 // must be at least one zero because of subtracting the min cost.
237 if (cost
[r
][c
] == 0 && !rowsCovered
[r
] && !colsCovered
[c
]) {
239 // Cover this row and column so that no other zeroes in them can be
241 rowsCovered
[r
] = true;
242 colsCovered
[c
] = true;
247 // Clear the covered rows and columns.
248 Arrays
.fill(rowsCovered
, false);
249 Arrays
.fill(colsCovered
, false);
253 * Test whether the algorithm is done, i.e. we have the optimal assignment.
254 * This occurs when there is exactly one starred zero in each row.
255 * @return true if the algorithm is done
257 private boolean testIsDone() {
258 // Cover all columns containing a starred zero. There can be at most one
259 // starred zero per column. Therefore, a covered column has an optimal
261 for (int r
= 0; r
< rows
; r
++) {
262 for (int c
= 0; c
< cols
; c
++) {
263 if (mask
[r
][c
] == STAR
) {
264 colsCovered
[c
] = true;
269 // Count the total number of covered columns.
271 for (int c
= 0; c
< cols
; c
++) {
272 coveredCols
+= colsCovered
[c
] ?
1 : 0;
275 // Apply an row and column adjustments that are pending.
276 for (int r
= 0; r
< rows
; r
++) {
277 for (int c
= 0; c
< cols
; c
++) {
278 cost
[r
][c
] += rowAdjust
[r
];
279 cost
[r
][c
] += colAdjust
[c
];
283 // Clear the pending row and column adjustments.
284 Arrays
.fill(rowAdjust
, 0);
285 Arrays
.fill(colAdjust
, 0);
287 // The covers on columns and rows may have been reset, recompute the least
288 // value for each row.
289 for (int r
= 0; r
< rows
; r
++) {
290 leastInRow
[r
] = Float
.POSITIVE_INFINITY
;
291 for (int c
= 0; c
< cols
; c
++) {
292 if (!rowsCovered
[r
] && !colsCovered
[c
] && cost
[r
][c
] < leastInRow
[r
]) {
293 leastInRow
[r
] = cost
[r
][c
];
294 leastInRowIndex
[r
] = c
;
299 // If all columns are covered, then we are done. Since there may be more
300 // columns than rows, we are also done if the number of covered columns is
301 // at least as great as the number of rows.
302 return (coveredCols
== cols
|| coveredCols
>= rows
);
306 * Corresponds to step 1 of the original algorithm.
307 * @return false if all zeroes are covered
309 private boolean stepOne() {
311 Pair
<Integer
, Integer
> zero
= findUncoveredZero();
313 // No uncovered zeroes, need to manipulate the cost matrix in step
317 // Prime the uncovered zero and find a starred zero in the same row.
318 mask
[zero
.getFirst()][zero
.getSecond()] = PRIME
;
319 Pair
<Integer
, Integer
> star
= starInRow(zero
.getFirst());
321 // Cover the row with both the newly primed zero and the starred zero.
322 // Since this is the only place where zeroes are primed, and we cover
323 // it here, and rows are only uncovered when primes are erased, then
324 // there can be at most one primed uncovered zero.
325 rowsCovered
[star
.getFirst()] = true;
326 colsCovered
[star
.getSecond()] = false;
327 updateMin(star
.getFirst(), star
.getSecond());
329 // Will go to step two after, where a path will be constructed,
330 // starting from the uncovered primed zero (there is only one). Since
331 // we have already found it, save it as the first node in the path.
333 path
.offerLast(new Pair
<>(zero
.getFirst(), zero
.getSecond()));
341 * Corresponds to step 2 of the original algorithm.
343 private void stepTwo() {
344 // Construct a path of alternating starred zeroes and primed zeroes, where
345 // each starred zero is in the same column as the previous primed zero, and
346 // each primed zero is in the same row as the previous starred zero. The
347 // path will always end in a primed zero.
349 Pair
<Integer
, Integer
> star
= starInCol(path
.getLast().getSecond());
351 path
.offerLast(star
);
355 Pair
<Integer
, Integer
> prime
= primeInRow(path
.getLast().getFirst());
356 path
.offerLast(prime
);
359 // Augment path - unmask all starred zeroes and star all primed zeroes. All
360 // nodes in the path will be either starred or primed zeroes. The set of
361 // starred zeroes is independent and now one larger than before.
362 for (Pair
<Integer
, Integer
> p
: path
) {
363 if (mask
[p
.getFirst()][p
.getSecond()] == STAR
) {
364 mask
[p
.getFirst()][p
.getSecond()] = NONE
;
366 mask
[p
.getFirst()][p
.getSecond()] = STAR
;
370 // Clear all covers from rows and columns.
371 Arrays
.fill(rowsCovered
, false);
372 Arrays
.fill(colsCovered
, false);
374 // Remove the prime mask from all primed zeroes.
375 for (int r
= 0; r
< rows
; r
++) {
376 for (int c
= 0; c
< cols
; c
++) {
377 if (mask
[r
][c
] == PRIME
) {
385 * Corresponds to step 3 of the original algorithm.
387 private void stepThree() {
388 // Find the minimum uncovered cost.
389 float min
= leastInRow
[0];
390 for (int r
= 1; r
< rows
; r
++) {
391 if (leastInRow
[r
] < min
) {
396 // Add the minimum cost to each of the costs in a covered row, or subtract
397 // the minimum cost from each of the costs in an uncovered column. As an
398 // optimization, do not actually modify the cost matrix yet, but track the
399 // adjustments that need to be made to each row and column.
400 for (int r
= 0; r
< rows
; r
++) {
401 if (rowsCovered
[r
]) {
405 for (int c
= 0; c
< cols
; c
++) {
406 if (!colsCovered
[c
]) {
411 // Since the cost matrix is not being updated yet, the minimum uncovered
412 // cost per row must be updated.
413 for (int r
= 0; r
< rows
; r
++) {
414 if (!colsCovered
[leastInRowIndex
[r
]]) {
415 // The least value in this row was in an uncovered column, meaning that
416 // it would have had the minimum value subtracted from it, and therefore
417 // will still be the minimum value in that row.
418 leastInRow
[r
] -= min
;
420 // The least value in this row was in a covered column and would not
421 // have had the minimum value subtracted from it, so the minimum value
422 // could be some in another column.
423 for (int c
= 0; c
< cols
; c
++) {
424 if (cost
[r
][c
] + colAdjust
[c
] + rowAdjust
[r
] < leastInRow
[r
]) {
425 leastInRow
[r
] = cost
[r
][c
] + colAdjust
[c
] + rowAdjust
[r
];
426 leastInRowIndex
[r
] = c
;
434 * Find a zero cost assignment which is not covered. If there are no zero cost
435 * assignments which are uncovered, then null will be returned.
436 * @return pair of row and column indices of an uncovered zero or null
438 private Pair
<Integer
, Integer
> findUncoveredZero() {
439 for (int r
= 0; r
< rows
; r
++) {
440 if (leastInRow
[r
] == 0) {
441 return new Pair
<>(r
, leastInRowIndex
[r
]);
448 * A specified row has become covered, and a specified column has become
449 * uncovered. The least value per row may need to be updated.
450 * @param row the index of the row which was just covered
451 * @param col the index of the column which was just uncovered
453 private void updateMin(int row
, int col
) {
454 // If the row is covered we want to ignore it as far as least values go.
455 leastInRow
[row
] = Float
.POSITIVE_INFINITY
;
457 for (int r
= 0; r
< rows
; r
++) {
458 // Since the column has only just been uncovered, it could not have any
459 // pending adjustments. Only covered rows can have pending adjustments
460 // and covered costs do not count toward row minimums. Therefore, we do
461 // not need to consider rowAdjust[r] or colAdjust[col].
462 if (!rowsCovered
[r
] && cost
[r
][col
] < leastInRow
[r
]) {
463 leastInRow
[r
] = cost
[r
][col
];
464 leastInRowIndex
[r
] = col
;
470 * Find a starred zero in a specified row. If there are no starred zeroes in
471 * the specified row, then null will be returned.
472 * @param r the index of the row to be searched
473 * @return pair of row and column indices of starred zero or null
475 private Pair
<Integer
, Integer
> starInRow(int r
) {
476 for (int c
= 0; c
< cols
; c
++) {
477 if (mask
[r
][c
] == STAR
) {
478 return new Pair
<>(r
, c
);
485 * Find a starred zero in the specified column. If there are no starred zeroes
486 * in the specified row, then null will be returned.
487 * @param c the index of the column to be searched
488 * @return pair of row and column indices of starred zero or null
490 private Pair
<Integer
, Integer
> starInCol(int c
) {
491 for (int r
= 0; r
< rows
; r
++) {
492 if (mask
[r
][c
] == STAR
) {
493 return new Pair
<>(r
, c
);
500 * Find a primed zero in the specified row. If there are no primed zeroes in
501 * the specified row, then null will be returned.
502 * @param r the index of the row to be searched
503 * @return pair of row and column indices of primed zero or null
505 private Pair
<Integer
, Integer
> primeInRow(int r
) {
506 for (int c
= 0; c
< cols
; c
++) {
507 if (mask
[r
][c
] == PRIME
) {
508 return new Pair
<>(r
, c
);