HBASE-17532 Replaced explicit type with diamond operator
[hbase.git] / hbase-server / src / main / java / org / apache / hadoop / hbase / util / MunkresAssignment.java
blob47217817acfb5d7c185dd776584ea8e3185ffa74
1 /*
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
19 package org.apache.hadoop.hbase.util;
21 import java.util.Arrays;
22 import java.util.Deque;
23 import java.util.LinkedList;
25 import org.apache.hadoop.hbase.classification.InterfaceAudience;
27 /**
28 * Computes the optimal (minimal cost) assignment of jobs to workers (or other
29 * analogous) concepts given a cost matrix of each pair of job and worker, using
30 * the algorithm by James Munkres in "Algorithms for the Assignment and
31 * Transportation Problems", with additional optimizations as described by Jin
32 * Kue Wong in "A New Implementation of an Algorithm for the Optimal Assignment
33 * Problem: An Improved Version of Munkres' Algorithm". The algorithm runs in
34 * O(n^3) time and need O(n^2) auxiliary space where n is the number of jobs or
35 * workers, whichever is greater.
37 @InterfaceAudience.Private
38 public class MunkresAssignment {
40 // The original algorithm by Munkres uses the terms STAR and PRIME to denote
41 // different states of zero values in the cost matrix. These values are
42 // represented as byte constants instead of enums to save space in the mask
43 // matrix by a factor of 4n^2 where n is the size of the problem.
44 private static final byte NONE = 0;
45 private static final byte STAR = 1;
46 private static final byte PRIME = 2;
48 // The algorithm requires that the number of column is at least as great as
49 // the number of rows. If that is not the case, then the cost matrix should
50 // be transposed before computation, and the solution matrix transposed before
51 // returning to the caller.
52 private final boolean transposed;
54 // The number of rows of internal matrices.
55 private final int rows;
57 // The number of columns of internal matrices.
58 private final int cols;
60 // The cost matrix, the cost of assigning each row index to column index.
61 private float[][] cost;
63 // Mask of zero cost assignment states.
64 private byte[][] mask;
66 // Covering some rows of the cost matrix.
67 private boolean[] rowsCovered;
69 // Covering some columns of the cost matrix.
70 private boolean[] colsCovered;
72 // The alternating path between starred zeroes and primed zeroes
73 private Deque<Pair<Integer, Integer>> path;
75 // The solution, marking which rows should be assigned to which columns. The
76 // positions of elements in this array correspond to the rows of the cost
77 // matrix, and the value of each element correspond to the columns of the cost
78 // matrix, i.e. assignments[i] = j indicates that row i should be assigned to
79 // column j.
80 private int[] assignments;
82 // Improvements described by Jin Kue Wong cache the least value in each row,
83 // as well as the column index of the least value in each row, and the pending
84 // adjustments to each row and each column.
85 private float[] leastInRow;
86 private int[] leastInRowIndex;
87 private float[] rowAdjust;
88 private float[] colAdjust;
90 /**
91 * Construct a new problem instance with the specified cost matrix. The cost
92 * matrix must be rectangular, though not necessarily square. If one dimension
93 * is greater than the other, some elements in the greater dimension will not
94 * be assigned. The input cost matrix will not be modified.
95 * @param costMatrix
97 public MunkresAssignment(float[][] costMatrix) {
98 // The algorithm assumes that the number of columns is at least as great as
99 // the number of rows. If this is not the case of the input matrix, then
100 // all internal structures must be transposed relative to the input.
101 this.transposed = costMatrix.length > costMatrix[0].length;
102 if (this.transposed) {
103 this.rows = costMatrix[0].length;
104 this.cols = costMatrix.length;
105 } else {
106 this.rows = costMatrix.length;
107 this.cols = costMatrix[0].length;
110 cost = new float[rows][cols];
111 mask = new byte[rows][cols];
112 rowsCovered = new boolean[rows];
113 colsCovered = new boolean[cols];
114 path = new LinkedList<>();
116 leastInRow = new float[rows];
117 leastInRowIndex = new int[rows];
118 rowAdjust = new float[rows];
119 colAdjust = new float[cols];
121 assignments = null;
123 // Copy cost matrix.
124 if (transposed) {
125 for (int r = 0; r < rows; r++) {
126 for (int c = 0; c < cols; c++) {
127 cost[r][c] = costMatrix[c][r];
130 } else {
131 for (int r = 0; r < rows; r++) {
132 System.arraycopy(costMatrix[r], 0, cost[r], 0, cols);
136 // Costs must be finite otherwise the matrix can get into a bad state where
137 // no progress can be made. If your use case depends on a distinction
138 // between costs of MAX_VALUE and POSITIVE_INFINITY, you're doing it wrong.
139 for (int r = 0; r < rows; r++) {
140 for (int c = 0; c < cols; c++) {
141 if (cost[r][c] == Float.POSITIVE_INFINITY) {
142 cost[r][c] = Float.MAX_VALUE;
149 * Get the optimal assignments. The returned array will have the same number
150 * of elements as the number of elements as the number of rows in the input
151 * cost matrix. Each element will indicate which column should be assigned to
152 * that row or -1 if no column should be assigned, i.e. if result[i] = j then
153 * row i should be assigned to column j. Subsequent invocations of this method
154 * will simply return the same object without additional computation.
155 * @return an array with the optimal assignments
157 public int[] solve() {
158 // If this assignment problem has already been solved, return the known
159 // solution
160 if (assignments != null) {
161 return assignments;
164 preliminaries();
166 // Find the optimal assignments.
167 while (!testIsDone()) {
168 while (!stepOne()) {
169 stepThree();
171 stepTwo();
174 // Extract the assignments from the mask matrix.
175 if (transposed) {
176 assignments = new int[cols];
177 outer:
178 for (int c = 0; c < cols; c++) {
179 for (int r = 0; r < rows; r++) {
180 if (mask[r][c] == STAR) {
181 assignments[c] = r;
182 continue outer;
185 // There is no assignment for this row of the input/output.
186 assignments[c] = -1;
188 } else {
189 assignments = new int[rows];
190 outer:
191 for (int r = 0; r < rows; r++) {
192 for (int c = 0; c < cols; c++) {
193 if (mask[r][c] == STAR) {
194 assignments[r] = c;
195 continue outer;
201 // Once the solution has been computed, there is no need to keep any of the
202 // other internal structures. Clear all unnecessary internal references so
203 // the garbage collector may reclaim that memory.
204 cost = null;
205 mask = null;
206 rowsCovered = null;
207 colsCovered = null;
208 path = null;
209 leastInRow = null;
210 leastInRowIndex = null;
211 rowAdjust = null;
212 colAdjust = null;
214 return assignments;
218 * Corresponds to the "preliminaries" step of the original algorithm.
219 * Guarantees that the matrix is an equivalent non-negative matrix with at
220 * least one zero in each row.
222 private void preliminaries() {
223 for (int r = 0; r < rows; r++) {
224 // Find the minimum cost of each row.
225 float min = Float.POSITIVE_INFINITY;
226 for (int c = 0; c < cols; c++) {
227 min = Math.min(min, cost[r][c]);
230 // Subtract that minimum cost from each element in the row.
231 for (int c = 0; c < cols; c++) {
232 cost[r][c] -= min;
234 // If the element is now zero and there are no zeroes in the same row
235 // or column which are already starred, then star this one. There
236 // must be at least one zero because of subtracting the min cost.
237 if (cost[r][c] == 0 && !rowsCovered[r] && !colsCovered[c]) {
238 mask[r][c] = STAR;
239 // Cover this row and column so that no other zeroes in them can be
240 // starred.
241 rowsCovered[r] = true;
242 colsCovered[c] = true;
247 // Clear the covered rows and columns.
248 Arrays.fill(rowsCovered, false);
249 Arrays.fill(colsCovered, false);
253 * Test whether the algorithm is done, i.e. we have the optimal assignment.
254 * This occurs when there is exactly one starred zero in each row.
255 * @return true if the algorithm is done
257 private boolean testIsDone() {
258 // Cover all columns containing a starred zero. There can be at most one
259 // starred zero per column. Therefore, a covered column has an optimal
260 // assignment.
261 for (int r = 0; r < rows; r++) {
262 for (int c = 0; c < cols; c++) {
263 if (mask[r][c] == STAR) {
264 colsCovered[c] = true;
269 // Count the total number of covered columns.
270 int coveredCols = 0;
271 for (int c = 0; c < cols; c++) {
272 coveredCols += colsCovered[c] ? 1 : 0;
275 // Apply an row and column adjustments that are pending.
276 for (int r = 0; r < rows; r++) {
277 for (int c = 0; c < cols; c++) {
278 cost[r][c] += rowAdjust[r];
279 cost[r][c] += colAdjust[c];
283 // Clear the pending row and column adjustments.
284 Arrays.fill(rowAdjust, 0);
285 Arrays.fill(colAdjust, 0);
287 // The covers on columns and rows may have been reset, recompute the least
288 // value for each row.
289 for (int r = 0; r < rows; r++) {
290 leastInRow[r] = Float.POSITIVE_INFINITY;
291 for (int c = 0; c < cols; c++) {
292 if (!rowsCovered[r] && !colsCovered[c] && cost[r][c] < leastInRow[r]) {
293 leastInRow[r] = cost[r][c];
294 leastInRowIndex[r] = c;
299 // If all columns are covered, then we are done. Since there may be more
300 // columns than rows, we are also done if the number of covered columns is
301 // at least as great as the number of rows.
302 return (coveredCols == cols || coveredCols >= rows);
306 * Corresponds to step 1 of the original algorithm.
307 * @return false if all zeroes are covered
309 private boolean stepOne() {
310 while (true) {
311 Pair<Integer, Integer> zero = findUncoveredZero();
312 if (zero == null) {
313 // No uncovered zeroes, need to manipulate the cost matrix in step
314 // three.
315 return false;
316 } else {
317 // Prime the uncovered zero and find a starred zero in the same row.
318 mask[zero.getFirst()][zero.getSecond()] = PRIME;
319 Pair<Integer, Integer> star = starInRow(zero.getFirst());
320 if (star != null) {
321 // Cover the row with both the newly primed zero and the starred zero.
322 // Since this is the only place where zeroes are primed, and we cover
323 // it here, and rows are only uncovered when primes are erased, then
324 // there can be at most one primed uncovered zero.
325 rowsCovered[star.getFirst()] = true;
326 colsCovered[star.getSecond()] = false;
327 updateMin(star.getFirst(), star.getSecond());
328 } else {
329 // Will go to step two after, where a path will be constructed,
330 // starting from the uncovered primed zero (there is only one). Since
331 // we have already found it, save it as the first node in the path.
332 path.clear();
333 path.offerLast(new Pair<>(zero.getFirst(), zero.getSecond()));
334 return true;
341 * Corresponds to step 2 of the original algorithm.
343 private void stepTwo() {
344 // Construct a path of alternating starred zeroes and primed zeroes, where
345 // each starred zero is in the same column as the previous primed zero, and
346 // each primed zero is in the same row as the previous starred zero. The
347 // path will always end in a primed zero.
348 while (true) {
349 Pair<Integer, Integer> star = starInCol(path.getLast().getSecond());
350 if (star != null) {
351 path.offerLast(star);
352 } else {
353 break;
355 Pair<Integer, Integer> prime = primeInRow(path.getLast().getFirst());
356 path.offerLast(prime);
359 // Augment path - unmask all starred zeroes and star all primed zeroes. All
360 // nodes in the path will be either starred or primed zeroes. The set of
361 // starred zeroes is independent and now one larger than before.
362 for (Pair<Integer, Integer> p : path) {
363 if (mask[p.getFirst()][p.getSecond()] == STAR) {
364 mask[p.getFirst()][p.getSecond()] = NONE;
365 } else {
366 mask[p.getFirst()][p.getSecond()] = STAR;
370 // Clear all covers from rows and columns.
371 Arrays.fill(rowsCovered, false);
372 Arrays.fill(colsCovered, false);
374 // Remove the prime mask from all primed zeroes.
375 for (int r = 0; r < rows; r++) {
376 for (int c = 0; c < cols; c++) {
377 if (mask[r][c] == PRIME) {
378 mask[r][c] = NONE;
385 * Corresponds to step 3 of the original algorithm.
387 private void stepThree() {
388 // Find the minimum uncovered cost.
389 float min = leastInRow[0];
390 for (int r = 1; r < rows; r++) {
391 if (leastInRow[r] < min) {
392 min = leastInRow[r];
396 // Add the minimum cost to each of the costs in a covered row, or subtract
397 // the minimum cost from each of the costs in an uncovered column. As an
398 // optimization, do not actually modify the cost matrix yet, but track the
399 // adjustments that need to be made to each row and column.
400 for (int r = 0; r < rows; r++) {
401 if (rowsCovered[r]) {
402 rowAdjust[r] += min;
405 for (int c = 0; c < cols; c++) {
406 if (!colsCovered[c]) {
407 colAdjust[c] -= min;
411 // Since the cost matrix is not being updated yet, the minimum uncovered
412 // cost per row must be updated.
413 for (int r = 0; r < rows; r++) {
414 if (!colsCovered[leastInRowIndex[r]]) {
415 // The least value in this row was in an uncovered column, meaning that
416 // it would have had the minimum value subtracted from it, and therefore
417 // will still be the minimum value in that row.
418 leastInRow[r] -= min;
419 } else {
420 // The least value in this row was in a covered column and would not
421 // have had the minimum value subtracted from it, so the minimum value
422 // could be some in another column.
423 for (int c = 0; c < cols; c++) {
424 if (cost[r][c] + colAdjust[c] + rowAdjust[r] < leastInRow[r]) {
425 leastInRow[r] = cost[r][c] + colAdjust[c] + rowAdjust[r];
426 leastInRowIndex[r] = c;
434 * Find a zero cost assignment which is not covered. If there are no zero cost
435 * assignments which are uncovered, then null will be returned.
436 * @return pair of row and column indices of an uncovered zero or null
438 private Pair<Integer, Integer> findUncoveredZero() {
439 for (int r = 0; r < rows; r++) {
440 if (leastInRow[r] == 0) {
441 return new Pair<>(r, leastInRowIndex[r]);
444 return null;
448 * A specified row has become covered, and a specified column has become
449 * uncovered. The least value per row may need to be updated.
450 * @param row the index of the row which was just covered
451 * @param col the index of the column which was just uncovered
453 private void updateMin(int row, int col) {
454 // If the row is covered we want to ignore it as far as least values go.
455 leastInRow[row] = Float.POSITIVE_INFINITY;
457 for (int r = 0; r < rows; r++) {
458 // Since the column has only just been uncovered, it could not have any
459 // pending adjustments. Only covered rows can have pending adjustments
460 // and covered costs do not count toward row minimums. Therefore, we do
461 // not need to consider rowAdjust[r] or colAdjust[col].
462 if (!rowsCovered[r] && cost[r][col] < leastInRow[r]) {
463 leastInRow[r] = cost[r][col];
464 leastInRowIndex[r] = col;
470 * Find a starred zero in a specified row. If there are no starred zeroes in
471 * the specified row, then null will be returned.
472 * @param r the index of the row to be searched
473 * @return pair of row and column indices of starred zero or null
475 private Pair<Integer, Integer> starInRow(int r) {
476 for (int c = 0; c < cols; c++) {
477 if (mask[r][c] == STAR) {
478 return new Pair<>(r, c);
481 return null;
485 * Find a starred zero in the specified column. If there are no starred zeroes
486 * in the specified row, then null will be returned.
487 * @param c the index of the column to be searched
488 * @return pair of row and column indices of starred zero or null
490 private Pair<Integer, Integer> starInCol(int c) {
491 for (int r = 0; r < rows; r++) {
492 if (mask[r][c] == STAR) {
493 return new Pair<>(r, c);
496 return null;
500 * Find a primed zero in the specified row. If there are no primed zeroes in
501 * the specified row, then null will be returned.
502 * @param r the index of the row to be searched
503 * @return pair of row and column indices of primed zero or null
505 private Pair<Integer, Integer> primeInRow(int r) {
506 for (int c = 0; c < cols; c++) {
507 if (mask[r][c] == PRIME) {
508 return new Pair<>(r, c);
511 return null;