usr/src/common/util/qsort.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28
  29 #if !defined(_KERNEL) && !defined(_KMDB)
  30 #include "lint.h"
  31 #endif /* !_KERNEL && !_KMDB */
  32
  33 #include <sys/types.h>
  34
  35 #if !defined(_KERNEL) && !defined(_KMDB)
  36 #include <stdlib.h>
  37 #include <synch.h>
  38 #endif /* !_KERNEL && !_KMDB */
  39
  40 #include "qsort.h"
  41
  42 static void swapp32(uint32_t *r1, uint32_t *r2, size_t cnt);
  43 static void swapp64(uint64_t *r1, uint64_t *r2, size_t cnt);
  44 static void swapi(uint32_t *r1, uint32_t *r2, size_t cnt);
  45 static void swapb(char *r1, char *r2, size_t cnt);
  46
  47 /*
  48  * choose a median of 3 values
  49  *
  50  * note: cstyle specifically prohibits nested conditional operators
  51  * but this is the only way to do the median of 3 function in-line
  52  */
  53 #define med3(a, b, c) (cmp((a), (b)) < 0) \
  54         ? ((cmp((b), (c)) < 0) ? (b) : (cmp((a), (c)) < 0) ? (c) : (a)) \
  55         : ((cmp((b), (c)) > 0) ? (b) : (cmp((a), (c)) > 0) ? (c) : (a))
  56
  57 #define THRESH_L        5       /* threshold for insertion sort */
  58 #define THRESH_M3       20      /* threshold for median of 3 */
  59 #define THRESH_M9       50      /* threshold for median of 9 */
  60
  61 typedef struct {
  62         char    *b_lim;
  63         size_t  nrec;
  64 } stk_t;
  65
  66 /*
  67  * qsort() is a general purpose, in-place sorting routine using a
  68  * user provided call back function for comparisons.  This implementation
  69  * utilizes a ternary quicksort algorithm, and cuts over to an
  70  * insertion sort for partitions involving fewer than THRESH_L records.
  71  *
  72  * Potential User Errors
  73  *   There is no return value from qsort, this function has no method
  74  *   of alerting the user that a sort did not work or could not work.
  75  *   We do not print an error message or exit the process or thread,
  76  *   Even if we can detect an error, We CANNOT silently return without
  77  *   sorting the data, if we did so the user could never be sure the
  78  *   sort completed successfully.
  79  *   It is possible we could change the return value of sort from void
  80  *   to int and return success or some error codes, but this gets into
  81  *   standards  and compatibility issues.
  82  *
  83  *   Examples of qsort parameter errors might be
  84  *   1) record size (rsiz) equal to 0
  85  *      qsort will loop and never return.
  86  *   2) record size (rsiz) less than 0
  87  *      rsiz is unsigned, so a negative value is insanely large
  88  *   3) number of records (nrec) is 0
  89  *      This is legal - qsort will return without examining any records
  90  *   4) number of records (nrec) is less than 0
  91  *      nrec is unsigned, so a negative value is insanely large.
  92  *   5) nrec * rsiz > memory allocation for sort array
  93  *      a segment violation may occur
  94  *      corruption of other memory may occur
  95  *   6) The base address of the sort array is invalid
  96  *      a segment violation may occur
  97  *      corruption of other memory may occur
  98  *   7) The user call back function is invalid
  99  *      we may get alignment errors or segment violations
 100  *      we may jump into never-never land
 101  *
 102  *   Some less obvious errors might be
 103  *   8) The user compare function is not comparing correctly
 104  *   9) The user compare function modifies the data records
 105  */
 106
 107 void
 108 qsort(
 109         void            *basep,
 110         size_t          nrec,
 111         size_t          rsiz,
 112         int             (*cmp)(const void *, const void *))
 113 {
 114         size_t          i;              /* temporary variable */
 115
 116         /* variables used by swap */
 117         void            (*swapf)(char *, char *, size_t);
 118         size_t          loops;
 119
 120         /* variables used by sort */
 121         stk_t           stack[8 * sizeof (nrec) + 1];
 122         stk_t           *sp;
 123         char            *b_lim;         /* bottom limit */
 124         char            *b_dup;         /* bottom duplicate */
 125         char            *b_par;         /* bottom partition */
 126         char            *t_lim;         /* top limit */
 127         char            *t_dup;         /* top duplicate */
 128         char            *t_par;         /* top partition */
 129         char            *m1, *m2, *m3;  /* median pointers */
 130         uintptr_t       d_bytelength;   /* byte length of duplicate records */
 131         int             b_nrec;
 132         int             t_nrec;
 133         int             cv;             /* results of compare (bottom / top) */
 134
 135         /*
 136          * choose a swap function based on alignment and size
 137          *
 138          * The qsort function sorts an array of fixed length records.
 139          * We have very limited knowledge about the data record itself.
 140          * It may be that the data record is in the array we are sorting
 141          * or it may be that the array contains pointers or indexes to
 142          * the actual data record and all that we are sorting is the indexes.
 143          *
 144          * The following decision will choose an optimal swap function
 145          * based on the size and alignment of the data records
 146          *   swapp64    will swap 64 bit pointers
 147          *   swapp32    will swap 32 bit pointers
 148          *   swapi      will swap an array of 32 bit integers
 149          *   swapb      will swap an array of 8 bit characters
 150          *
 151          * swapi and swapb will also require the variable loops to be set
 152          * to control the length of the array being swapped
 153          */
 154         if ((((uintptr_t)basep & (sizeof (uint64_t) - 1)) == 0) &&
 155             (rsiz == sizeof (uint64_t))) {
 156                 loops = 1;
 157                 swapf = (void (*)(char *, char *, size_t))swapp64;
 158         } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
 159             (rsiz == sizeof (uint32_t))) {
 160                 loops = 1;
 161                 swapf = (void (*)(char *, char *, size_t))swapp32;
 162         } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
 163             ((rsiz & (sizeof (uint32_t) - 1)) == 0)) {
 164                 loops = rsiz / sizeof (int);
 165                 swapf = (void (*)(char *, char *, size_t))swapi;
 166         } else {
 167                 loops = rsiz;
 168                 swapf = swapb;
 169         }
 170
 171         /*
 172          * qsort is a partitioning sort
 173          *
 174          * the stack is the bookkeeping mechanism to keep track of all
 175          * the partitions.
 176          *
 177          * each sort pass takes one partition and sorts it into two partitions.
 178          * at the top of the loop we simply take the partition on the top
 179          * of the stack and sort it. See the comments at the bottom
 180          * of the loop regarding which partitions to add in what order.
 181          *
 182          * initially put the whole partition on the stack
 183          */
 184         sp = stack;
 185         sp->b_lim = (char *)basep;
 186         sp->nrec = nrec;
 187         sp++;
 188         while (sp > stack) {
 189                 sp--;
 190                 b_lim = sp->b_lim;
 191                 nrec = sp->nrec;
 192
 193                 /*
 194                  * a linear insertion sort i faster than a qsort for
 195                  * very small number of records (THRESH_L)
 196                  *
 197                  * if number records < threshold use linear insertion sort
 198                  *
 199                  * this also handles the special case where the partition
 200                  * 0 or 1 records length.
 201                  */
 202                 if (nrec < THRESH_L) {
 203                         /*
 204                          * Linear insertion sort
 205                          */
 206                         t_par = b_lim;
 207                         for (i = 1; i < nrec; i++) {
 208                                 t_par += rsiz;
 209                                 b_par = t_par;
 210                                 while (b_par > b_lim) {
 211                                         b_par -= rsiz;
 212                                         if ((*cmp)(b_par, b_par + rsiz) <= 0) {
 213                                                 break;
 214                                         }
 215                                         (*swapf)(b_par, b_par + rsiz, loops);
 216                                 }
 217                         }
 218
 219                         /*
 220                          * a linear insertion sort will put all records
 221                          * in their final position and will not create
 222                          * subpartitions.
 223                          *
 224                          * therefore when the insertion sort is complete
 225                          * just go to the top of the loop and get the
 226                          * next partition to sort.
 227                          */
 228                         continue;
 229                 }
 230
 231                 /* quicksort */
 232
 233                 /*
 234                  * choose a pivot record
 235                  *
 236                  * Ideally the pivot record will divide the partition
 237                  * into two equal parts. however we have to balance the
 238                  * work involved in selecting the pivot record with the
 239                  * expected benefit.
 240                  *
 241                  * The choice of pivot record depends on the number of
 242                  * records in the partition
 243                  *
 244                  * for small partitions (nrec < THRESH_M3)
 245                  *   we just select the record in the middle of the partition
 246                  *
 247                  * if (nrec >= THRESH_M3 && nrec < THRESH_M9)
 248                  *   we select three values and choose the median of 3
 249                  *
 250                  * if (nrec >= THRESH_M9)
 251                  *   then we use an approximate median of 9
 252                  *   9 records are selected and grouped in 3 groups of 3
 253                  *   the median of each of these 3 groups is fed into another
 254                  *   median of 3 decision.
 255                  *
 256                  * Each median of 3 decision is 2 or 3 compares,
 257                  * so median of 9 costs between 8 and 12 compares.
 258                  *
 259                  * i is byte distance between two consecutive samples
 260                  * m2 will point to the pivot record
 261                  */
 262                 if (nrec < THRESH_M3) {
 263                         m2 = b_lim + (nrec / 2) * rsiz;
 264                 } else if (nrec < THRESH_M9) {
 265                         /* use median of 3 */
 266                         i = ((nrec - 1) / 2) * rsiz;
 267                         m2 = med3(b_lim, b_lim + i, b_lim + 2 * i);
 268                 } else {
 269                         /* approx median of 9 */
 270                         i = ((nrec - 1) / 8) * rsiz;
 271                         m1 = med3(b_lim, b_lim +  i, b_lim + 2 * i);
 272                         m2 = med3(b_lim + 3 * i, b_lim + 4 * i, b_lim + 5 * i);
 273                         m3 = med3(b_lim + 6 * i, b_lim + 7 * i, b_lim + 8 * i);
 274                         m2 = med3(m1, m2, m3);
 275                 }
 276
 277                 /*
 278                  * quick sort partitioning
 279                  *
 280                  * The partition limits are defined by bottom and top pointers
 281                  * b_lim and t_lim.
 282                  *
 283                  * qsort uses a fairly standard method of moving the
 284                  * partitioning pointers, b_par and t_par, to the middle of
 285                  * the partition and exchanging records that are in the
 286                  * wrong part of the partition.
 287                  *
 288                  * Two enhancements have been made to the basic algorithm.
 289                  * One for handling duplicate records and one to minimize
 290                  * the number of swaps.
 291                  *
 292                  * Two duplicate records pointers are (b_dup and t_dup) are
 293                  * initially set to b_lim and t_lim.  Each time a record
 294                  * whose sort key value is equal to the pivot record is found
 295                  * it will be swapped with the record pointed to by
 296                  * b_dup or t_dup and the duplicate pointer will be
 297                  * incremented toward the center.
 298                  * When partitioning is complete, all the duplicate records
 299                  * will have been collected at the upper and lower limits of
 300                  * the partition and can easily be moved adjacent to the
 301                  * pivot record.
 302                  *
 303                  * The second optimization is to minimize the number of swaps.
 304                  * The pointer m2 points to the pivot record.
 305                  * During partitioning, if m2 is ever equal to the partitioning
 306                  * pointers, b_par or t_par, then b_par or t_par just moves
 307                  * onto the next record without doing a compare.
 308                  * If as a result of duplicate record detection,
 309                  * b_dup or t_dup is ever equal to m2,
 310                  * then m2 is changed to point to the duplicate record and
 311                  * b_dup or t_dup is incremented with out swapping records.
 312                  *
 313                  * When partitioning is done, we may not have the same pivot
 314                  * record that we started with, but we will have one with
 315                  * an equal sort key.
 316                  */
 317                 b_dup = b_par           = b_lim;
 318                 t_dup = t_par = t_lim   = b_lim + rsiz * (nrec - 1);
 319                 for (;;) {
 320
 321                         /* move bottom pointer up */
 322                         for (; b_par <= t_par; b_par += rsiz) {
 323                                 if (b_par == m2) {
 324                                         continue;
 325                                 }
 326                                 cv = cmp(b_par, m2);
 327                                 if (cv > 0) {
 328                                         break;
 329                                 }
 330                                 if (cv == 0) {
 331                                         if (b_dup == m2) {
 332                                                 m2 = b_par;
 333                                         } else if (b_dup != b_par) {
 334                                                 (*swapf)(b_dup, b_par, loops);
 335                                         }
 336                                         b_dup += rsiz;
 337                                 }
 338                         }
 339
 340                         /* move top pointer down */
 341                         for (; b_par < t_par; t_par -= rsiz) {
 342                                 if (t_par == m2) {
 343                                         continue;
 344                                 }
 345                                 cv = cmp(t_par, m2);
 346                                 if (cv < 0) {
 347                                         break;
 348                                 }
 349                                 if (cv == 0) {
 350                                         if (t_dup == m2) {
 351                                                 m2 = t_par;
 352                                         } else if (t_dup != t_par) {
 353                                                 (*swapf)(t_dup, t_par, loops);
 354                                         }
 355                                         t_dup -= rsiz;
 356                                 }
 357                         }
 358
 359                         /* break if we are done partitioning */
 360                         if (b_par >= t_par) {
 361                                 break;
 362                         }
 363
 364                         /* exchange records at upper and lower break points */
 365                         (*swapf)(b_par, t_par, loops);
 366                         b_par += rsiz;
 367                         t_par -= rsiz;
 368                 }
 369
 370                 /*
 371                  * partitioning is now complete
 372                  *
 373                  * there are two termination conditions from the partitioning
 374                  * loop above.  Either b_par or t_par have crossed or
 375                  * they are equal.
 376                  *
 377                  * we need to swap the pivot record to its final position
 378                  * m2 could be in either the upper or lower partitions
 379                  * or it could already be in its final position
 380                  */
 381                 /*
 382                  * R[b_par] > R[m2]
 383                  * R[t_par] < R[m2]
 384                  */
 385                 if (t_par < b_par) {
 386                         if (m2 < t_par) {
 387                                 (*swapf)(m2, t_par, loops);
 388                                 m2 = b_par = t_par;
 389                         } else if (m2 > b_par) {
 390                                 (*swapf)(m2, b_par, loops);
 391                                 m2 = t_par = b_par;
 392                         } else {
 393                                 b_par = t_par = m2;
 394                         }
 395                 } else {
 396                         if (m2 < t_par) {
 397                                 t_par = b_par = t_par - rsiz;
 398                         }
 399                         if (m2 != b_par) {
 400                                 (*swapf)(m2, b_par, loops);
 401                         }
 402                         m2 = t_par;
 403                 }
 404
 405                 /*
 406                  * move bottom duplicates next to pivot
 407                  * optimized to eliminate overlap
 408                  */
 409                 d_bytelength = b_dup - b_lim;
 410                 if (b_par - b_dup < d_bytelength) {
 411                         b_dup = b_lim + (b_par - b_dup);
 412                 }
 413                 while (b_dup > b_lim) {
 414                         b_dup -= rsiz;
 415                         b_par -= rsiz;
 416                         (*swapf)(b_dup, b_par, loops);
 417                 }
 418                 b_par = m2 - d_bytelength;
 419
 420                 /*
 421                  * move top duplicates next to pivot
 422                  */
 423                 d_bytelength = t_lim - t_dup;
 424                 if (t_dup - t_par < d_bytelength) {
 425                         t_dup = t_lim - (t_dup - t_par);
 426                 }
 427                 while (t_dup < t_lim) {
 428                         t_dup += rsiz;
 429                         t_par += rsiz;
 430                         (*swapf)(t_dup, t_par, loops);
 431                 }
 432                 t_par = m2 + d_bytelength;
 433
 434                 /*
 435                  * when a qsort pass completes there are three partitions
 436                  * 1) the lower contains all records less than pivot
 437                  * 2) the upper contains all records greater than pivot
 438                  * 3) the pivot partition contains all record equal to pivot
 439                  *
 440                  * all records in the pivot partition are in their final
 441                  * position and do not need to be accounted for by the stack
 442                  *
 443                  * when adding partitions to the stack
 444                  * it is important to add the largest partition first
 445                  * to prevent stack overflow.
 446                  *
 447                  * calculate number of unsorted records in top and bottom
 448                  * push resulting partitions on stack
 449                  */
 450                 b_nrec = (b_par - b_lim) / rsiz;
 451                 t_nrec = (t_lim - t_par) / rsiz;
 452                 if (b_nrec < t_nrec) {
 453                         sp->b_lim = t_par + rsiz;
 454                         sp->nrec = t_nrec;
 455                         sp++;
 456                         sp->b_lim = b_lim;
 457                         sp->nrec = b_nrec;
 458                         sp++;
 459                 } else {
 460                         sp->b_lim = b_lim;
 461                         sp->nrec = b_nrec;
 462                         sp++;
 463                         sp->b_lim = t_par + rsiz;
 464                         sp->nrec = t_nrec;
 465                         sp++;
 466                 }
 467         }
 468 }
 469
 470 /*
 471  * The following swap functions should not create a stack frame
 472  * the SPARC call / return instruction will be executed
 473  * but the a save / restore will not be executed
 474  * which means we won't do a window turn with the spill / fill overhead
 475  * verify this by examining the assembly code
 476  */
 477
 478 /* ARGSUSED */
 479 static void
 480 swapp32(uint32_t *r1, uint32_t *r2, size_t cnt)
 481 {
 482         uint32_t temp;
 483
 484         temp = *r1;
 485         *r1++ = *r2;
 486         *r2++ = temp;
 487 }
 488
 489 /* ARGSUSED */
 490 static void
 491 swapp64(uint64_t *r1, uint64_t *r2, size_t cnt)
 492 {
 493         uint64_t temp;
 494
 495         temp = *r1;
 496         *r1++ = *r2;
 497         *r2++ = temp;
 498 }
 499
 500 static void
 501 swapi(uint32_t *r1, uint32_t *r2, size_t cnt)
 502 {
 503         uint32_t temp;
 504
 505         /* character by character */
 506         while (cnt--) {
 507                 temp = *r1;
 508                 *r1++ = *r2;
 509                 *r2++ = temp;
 510         }
 511 }
 512
 513 static void
 514 swapb(char *r1, char *r2, size_t cnt)
 515 {
 516         char    temp;
 517
 518         /* character by character */
 519         while (cnt--) {
 520                 temp = *r1;
 521                 *r1++ = *r2;
 522                 *r2++ = temp;
 523         }
 524 }