module/zfs/vdev_raidz_math_impl.h

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or https://opensource.org/licenses/CDDL-1.0.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
  23  */
  24
  25 #ifndef _VDEV_RAIDZ_MATH_IMPL_H
  26 #define _VDEV_RAIDZ_MATH_IMPL_H
  27
  28 #include <sys/types.h>
  29 #include <sys/vdev_raidz_impl.h>
  30
  31 #define raidz_inline inline __attribute__((always_inline))
  32 #ifndef noinline
  33 #define noinline __attribute__((noinline))
  34 #endif
  35
  36 /*
  37  * Functions calculate multiplication constants for data reconstruction.
  38  * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
  39  * used parity columns for reconstruction.
  40  * @rr                  RAIDZ row
  41  * @tgtidx              array of missing data indexes
  42  * @coeff               output array of coefficients. Array must be provided by
  43  *                      user and must hold minimum MUL_CNT values.
  44  */
  45 static noinline void
  46 raidz_rec_q_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
  47 {
  48         const unsigned ncols = rr->rr_cols;
  49         const unsigned x = tgtidx[TARGET_X];
  50
  51         coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
  52 }
  53
  54 static noinline void
  55 raidz_rec_r_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
  56 {
  57         const unsigned ncols = rr->rr_cols;
  58         const unsigned x = tgtidx[TARGET_X];
  59
  60         coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
  61 }
  62
  63 static noinline void
  64 raidz_rec_pq_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
  65 {
  66         const unsigned ncols = rr->rr_cols;
  67         const unsigned x = tgtidx[TARGET_X];
  68         const unsigned y = tgtidx[TARGET_Y];
  69         gf_t a, b, e;
  70
  71         a = gf_exp2(x + 255 - y);
  72         b = gf_exp2(255 - (ncols - x - 1));
  73         e = a ^ 0x01;
  74
  75         coeff[MUL_PQ_X] = gf_div(a, e);
  76         coeff[MUL_PQ_Y] = gf_div(b, e);
  77 }
  78
  79 static noinline void
  80 raidz_rec_pr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
  81 {
  82         const unsigned ncols = rr->rr_cols;
  83         const unsigned x = tgtidx[TARGET_X];
  84         const unsigned y = tgtidx[TARGET_Y];
  85
  86         gf_t a, b, e;
  87
  88         a = gf_exp4(x + 255 - y);
  89         b = gf_exp4(255 - (ncols - x - 1));
  90         e = a ^ 0x01;
  91
  92         coeff[MUL_PR_X] = gf_div(a, e);
  93         coeff[MUL_PR_Y] = gf_div(b, e);
  94 }
  95
  96 static noinline void
  97 raidz_rec_qr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
  98 {
  99         const unsigned ncols = rr->rr_cols;
 100         const unsigned x = tgtidx[TARGET_X];
 101         const unsigned y = tgtidx[TARGET_Y];
 102
 103         gf_t nx, ny, nxxy, nxyy, d;
 104
 105         nx = gf_exp2(ncols - x - 1);
 106         ny = gf_exp2(ncols - y - 1);
 107         nxxy = gf_mul(gf_mul(nx, nx), ny);
 108         nxyy = gf_mul(gf_mul(nx, ny), ny);
 109         d = nxxy ^ nxyy;
 110
 111         coeff[MUL_QR_XQ] = ny;
 112         coeff[MUL_QR_X] = gf_div(ny, d);
 113         coeff[MUL_QR_YQ] = nx;
 114         coeff[MUL_QR_Y] = gf_div(nx, d);
 115 }
 116
 117 static noinline void
 118 raidz_rec_pqr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 119 {
 120         const unsigned ncols = rr->rr_cols;
 121         const unsigned x = tgtidx[TARGET_X];
 122         const unsigned y = tgtidx[TARGET_Y];
 123         const unsigned z = tgtidx[TARGET_Z];
 124
 125         gf_t nx, ny, nz, nxx, nyy, nzz, nyyz, nyzz, xd, yd;
 126
 127         nx = gf_exp2(ncols - x - 1);
 128         ny = gf_exp2(ncols - y - 1);
 129         nz = gf_exp2(ncols - z - 1);
 130
 131         nxx = gf_exp4(ncols - x - 1);
 132         nyy = gf_exp4(ncols - y - 1);
 133         nzz = gf_exp4(ncols - z - 1);
 134
 135         nyyz = gf_mul(gf_mul(ny, nz), ny);
 136         nyzz = gf_mul(nzz, ny);
 137
 138         xd = gf_mul(nxx, ny) ^ gf_mul(nx, nyy) ^ nyyz ^
 139             gf_mul(nxx, nz) ^ gf_mul(nzz, nx) ^  nyzz;
 140
 141         yd = gf_inv(ny ^ nz);
 142
 143         coeff[MUL_PQR_XP] = gf_div(nyyz ^ nyzz, xd);
 144         coeff[MUL_PQR_XQ] = gf_div(nyy ^ nzz, xd);
 145         coeff[MUL_PQR_XR] = gf_div(ny ^ nz, xd);
 146         coeff[MUL_PQR_YU] = nx;
 147         coeff[MUL_PQR_YP] = gf_mul(nz, yd);
 148         coeff[MUL_PQR_YQ] = yd;
 149 }
 150
 151 /*
 152  * Method for zeroing a buffer (can be implemented using SIMD).
 153  * This method is used by multiple for gen/rec functions.
 154  *
 155  * @dc          Destination buffer
 156  * @dsize       Destination buffer size
 157  * @private     Unused
 158  */
 159 static int
 160 raidz_zero_abd_cb(void *dc, size_t dsize, void *private)
 161 {
 162         v_t *dst = (v_t *)dc;
 163         size_t i;
 164
 165         ZERO_DEFINE();
 166
 167         (void) private; /* unused */
 168
 169         ZERO(ZERO_D);
 170
 171         for (i = 0; i < dsize / sizeof (v_t); i += (2 * ZERO_STRIDE)) {
 172                 STORE(dst + i, ZERO_D);
 173                 STORE(dst + i + ZERO_STRIDE, ZERO_D);
 174         }
 175
 176         return (0);
 177 }
 178
 179 #define raidz_zero(dabd, size)                                          \
 180 {                                                                       \
 181         abd_iterate_func(dabd, 0, size, raidz_zero_abd_cb, NULL);       \
 182 }
 183
 184 /*
 185  * Method for copying two buffers (can be implemented using SIMD).
 186  * This method is used by multiple for gen/rec functions.
 187  *
 188  * @dc          Destination buffer
 189  * @sc          Source buffer
 190  * @dsize       Destination buffer size
 191  * @ssize       Source buffer size
 192  * @private     Unused
 193  */
 194 static int
 195 raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
 196 {
 197         v_t *dst = (v_t *)dc;
 198         const v_t *src = (v_t *)sc;
 199         size_t i;
 200
 201         COPY_DEFINE();
 202
 203         (void) private; /* unused */
 204
 205         for (i = 0; i < size / sizeof (v_t); i += (2 * COPY_STRIDE)) {
 206                 LOAD(src + i, COPY_D);
 207                 STORE(dst + i, COPY_D);
 208
 209                 LOAD(src + i + COPY_STRIDE, COPY_D);
 210                 STORE(dst + i + COPY_STRIDE, COPY_D);
 211         }
 212
 213         return (0);
 214 }
 215
 216
 217 #define raidz_copy(dabd, sabd, size)                                    \
 218 {                                                                       \
 219         abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_copy_abd_cb, NULL);\
 220 }
 221
 222 /*
 223  * Method for adding (XORing) two buffers.
 224  * Source and destination are XORed together and result is stored in
 225  * destination buffer. This method is used by multiple for gen/rec functions.
 226  *
 227  * @dc          Destination buffer
 228  * @sc          Source buffer
 229  * @dsize       Destination buffer size
 230  * @ssize       Source buffer size
 231  * @private     Unused
 232  */
 233 static int
 234 raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
 235 {
 236         v_t *dst = (v_t *)dc;
 237         const v_t *src = (v_t *)sc;
 238         size_t i;
 239
 240         ADD_DEFINE();
 241
 242         (void) private; /* unused */
 243
 244         for (i = 0; i < size / sizeof (v_t); i += (2 * ADD_STRIDE)) {
 245                 LOAD(dst + i, ADD_D);
 246                 XOR_ACC(src + i, ADD_D);
 247                 STORE(dst + i, ADD_D);
 248
 249                 LOAD(dst + i + ADD_STRIDE, ADD_D);
 250                 XOR_ACC(src + i + ADD_STRIDE, ADD_D);
 251                 STORE(dst + i + ADD_STRIDE, ADD_D);
 252         }
 253
 254         return (0);
 255 }
 256
 257 #define raidz_add(dabd, sabd, size)                                     \
 258 {                                                                       \
 259         abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_add_abd_cb, NULL);\
 260 }
 261
 262 /*
 263  * Method for multiplying a buffer with a constant in GF(2^8).
 264  * Symbols from buffer are multiplied by a constant and result is stored
 265  * back in the same buffer.
 266  *
 267  * @dc          In/Out data buffer.
 268  * @size        Size of the buffer
 269  * @private     pointer to the multiplication constant (unsigned)
 270  */
 271 static int
 272 raidz_mul_abd_cb(void *dc, size_t size, void *private)
 273 {
 274         const unsigned mul = *((unsigned *)private);
 275         v_t *d = (v_t *)dc;
 276         size_t i;
 277
 278         MUL_DEFINE();
 279
 280         for (i = 0; i < size / sizeof (v_t); i += (2 * MUL_STRIDE)) {
 281                 LOAD(d + i, MUL_D);
 282                 MUL(mul, MUL_D);
 283                 STORE(d + i, MUL_D);
 284
 285                 LOAD(d + i + MUL_STRIDE, MUL_D);
 286                 MUL(mul, MUL_D);
 287                 STORE(d + i + MUL_STRIDE, MUL_D);
 288         }
 289
 290         return (0);
 291 }
 292
 293
 294 /*
 295  * Syndrome generation/update macros
 296  *
 297  * Require LOAD(), XOR(), STORE(), MUL2(), and MUL4() macros
 298  */
 299 #define P_D_SYNDROME(D, T, t)           \
 300 {                                       \
 301         LOAD((t), T);                   \
 302         XOR(D, T);                      \
 303         STORE((t), T);                  \
 304 }
 305
 306 #define Q_D_SYNDROME(D, T, t)           \
 307 {                                       \
 308         LOAD((t), T);                   \
 309         MUL2(T);                        \
 310         XOR(D, T);                      \
 311         STORE((t), T);                  \
 312 }
 313
 314 #define Q_SYNDROME(T, t)                \
 315 {                                       \
 316         LOAD((t), T);                   \
 317         MUL2(T);                        \
 318         STORE((t), T);                  \
 319 }
 320
 321 #define R_D_SYNDROME(D, T, t)           \
 322 {                                       \
 323         LOAD((t), T);                   \
 324         MUL4(T);                        \
 325         XOR(D, T);                      \
 326         STORE((t), T);                  \
 327 }
 328
 329 #define R_SYNDROME(T, t)                \
 330 {                                       \
 331         LOAD((t), T);                   \
 332         MUL4(T);                        \
 333         STORE((t), T);                  \
 334 }
 335
 336
 337 /*
 338  * PARITY CALCULATION
 339  *
 340  * Macros *_SYNDROME are used for parity/syndrome calculation.
 341  * *_D_SYNDROME() macros are used to calculate syndrome between 0 and
 342  * length of data column, and *_SYNDROME() macros are only for updating
 343  * the parity/syndrome if data column is shorter.
 344  *
 345  * P parity is calculated using raidz_add_abd().
 346  */
 347
 348 /*
 349  * Generate P parity (RAIDZ1)
 350  *
 351  * @rr  RAIDZ row
 352  */
 353 static raidz_inline void
 354 raidz_generate_p_impl(raidz_row_t * const rr)
 355 {
 356         size_t c;
 357         const size_t ncols = rr->rr_cols;
 358         const size_t psize = rr->rr_col[CODE_P].rc_size;
 359         abd_t *pabd = rr->rr_col[CODE_P].rc_abd;
 360         size_t size;
 361         abd_t *dabd;
 362
 363         raidz_math_begin();
 364
 365         /* start with first data column */
 366         raidz_copy(pabd, rr->rr_col[1].rc_abd, psize);
 367
 368         for (c = 2; c < ncols; c++) {
 369                 dabd = rr->rr_col[c].rc_abd;
 370                 size = rr->rr_col[c].rc_size;
 371
 372                 /* add data column */
 373                 raidz_add(pabd, dabd, size);
 374         }
 375
 376         raidz_math_end();
 377 }
 378
 379
 380 /*
 381  * Generate PQ parity (RAIDZ2)
 382  * The function is called per data column.
 383  *
 384  * @c           array of pointers to parity (code) columns
 385  * @dc          pointer to data column
 386  * @csize       size of parity columns
 387  * @dsize       size of data column
 388  */
 389 static void
 390 raidz_gen_pq_add(void **c, const void *dc, const size_t csize,
 391     const size_t dsize)
 392 {
 393         v_t *p = (v_t *)c[0];
 394         v_t *q = (v_t *)c[1];
 395         const v_t *d = (const v_t *)dc;
 396         const v_t * const dend = d + (dsize / sizeof (v_t));
 397         const v_t * const qend = q + (csize / sizeof (v_t));
 398
 399         GEN_PQ_DEFINE();
 400
 401         MUL2_SETUP();
 402
 403         for (; d < dend; d += GEN_PQ_STRIDE, p += GEN_PQ_STRIDE,
 404             q += GEN_PQ_STRIDE) {
 405                 LOAD(d, GEN_PQ_D);
 406                 P_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, p);
 407                 Q_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, q);
 408         }
 409         for (; q < qend; q += GEN_PQ_STRIDE) {
 410                 Q_SYNDROME(GEN_PQ_C, q);
 411         }
 412 }
 413
 414
 415 /*
 416  * Generate PQ parity (RAIDZ2)
 417  *
 418  * @rr  RAIDZ row
 419  */
 420 static raidz_inline void
 421 raidz_generate_pq_impl(raidz_row_t * const rr)
 422 {
 423         size_t c;
 424         const size_t ncols = rr->rr_cols;
 425         const size_t csize = rr->rr_col[CODE_P].rc_size;
 426         size_t dsize;
 427         abd_t *dabd;
 428         abd_t *cabds[] = {
 429                 rr->rr_col[CODE_P].rc_abd,
 430                 rr->rr_col[CODE_Q].rc_abd
 431         };
 432
 433         raidz_math_begin();
 434
 435         raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, csize);
 436         raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, csize);
 437
 438         for (c = 3; c < ncols; c++) {
 439                 dabd = rr->rr_col[c].rc_abd;
 440                 dsize = rr->rr_col[c].rc_size;
 441
 442                 abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2,
 443                     raidz_gen_pq_add);
 444         }
 445
 446         raidz_math_end();
 447 }
 448
 449
 450 /*
 451  * Generate PQR parity (RAIDZ3)
 452  * The function is called per data column.
 453  *
 454  * @c           array of pointers to parity (code) columns
 455  * @dc          pointer to data column
 456  * @csize       size of parity columns
 457  * @dsize       size of data column
 458  */
 459 static void
 460 raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
 461     const size_t dsize)
 462 {
 463         v_t *p = (v_t *)c[CODE_P];
 464         v_t *q = (v_t *)c[CODE_Q];
 465         v_t *r = (v_t *)c[CODE_R];
 466         const v_t *d = (const v_t *)dc;
 467         const v_t * const dend = d + (dsize / sizeof (v_t));
 468         const v_t * const qend = q + (csize / sizeof (v_t));
 469
 470         GEN_PQR_DEFINE();
 471
 472         MUL2_SETUP();
 473
 474         for (; d < dend; d += GEN_PQR_STRIDE, p += GEN_PQR_STRIDE,
 475             q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
 476                 LOAD(d, GEN_PQR_D);
 477                 P_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, p);
 478                 Q_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, q);
 479                 R_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, r);
 480         }
 481         for (; q < qend; q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
 482                 Q_SYNDROME(GEN_PQR_C, q);
 483                 R_SYNDROME(GEN_PQR_C, r);
 484         }
 485 }
 486
 487
 488 /*
 489  * Generate PQR parity (RAIDZ3)
 490  *
 491  * @rr  RAIDZ row
 492  */
 493 static raidz_inline void
 494 raidz_generate_pqr_impl(raidz_row_t * const rr)
 495 {
 496         size_t c;
 497         const size_t ncols = rr->rr_cols;
 498         const size_t csize = rr->rr_col[CODE_P].rc_size;
 499         size_t dsize;
 500         abd_t *dabd;
 501         abd_t *cabds[] = {
 502                 rr->rr_col[CODE_P].rc_abd,
 503                 rr->rr_col[CODE_Q].rc_abd,
 504                 rr->rr_col[CODE_R].rc_abd
 505         };
 506
 507         raidz_math_begin();
 508
 509         raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, csize);
 510         raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, csize);
 511         raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, csize);
 512
 513         for (c = 4; c < ncols; c++) {
 514                 dabd = rr->rr_col[c].rc_abd;
 515                 dsize = rr->rr_col[c].rc_size;
 516
 517                 abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3,
 518                     raidz_gen_pqr_add);
 519         }
 520
 521         raidz_math_end();
 522 }
 523
 524
 525 /*
 526  * DATA RECONSTRUCTION
 527  *
 528  * Data reconstruction process consists of two phases:
 529  *      - Syndrome calculation
 530  *      - Data reconstruction
 531  *
 532  * Syndrome is calculated by generating parity using available data columns
 533  * and zeros in places of erasure. Existing parity is added to corresponding
 534  * syndrome value to obtain the [P|Q|R]syn values from equation:
 535  *      P = Psyn + Dx + Dy + Dz
 536  *      Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
 537  *      R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
 538  *
 539  * For data reconstruction phase, the corresponding equations are solved
 540  * for missing data (Dx, Dy, Dz). This generally involves multiplying known
 541  * symbols by an coefficient and adding them together. The multiplication
 542  * constant coefficients are calculated ahead of the operation in
 543  * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
 544  *
 545  * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
 546  * and "short" columns.
 547  * For this reason, reconstruction is performed in minimum of
 548  * two steps. First, from offset 0 to short_size, then from short_size to
 549  * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
 550  * over both ranges. The split also enables removal of conditional expressions
 551  * from loop bodies, improving throughput of SIMD implementations.
 552  * For the best performance, all functions marked with raidz_inline attribute
 553  * must be inlined by compiler.
 554  *
 555  *    parity          data
 556  *    columns         columns
 557  * <----------> <------------------>
 558  *                   x       y  <----+ missing columns (x, y)
 559  *                   |       |
 560  * +---+---+---+---+-v-+---+-v-+---+   ^ 0
 561  * |   |   |   |   |   |   |   |   |   |
 562  * |   |   |   |   |   |   |   |   |   |
 563  * | P | Q | R | D | D | D | D | D |   |
 564  * |   |   |   | 0 | 1 | 2 | 3 | 4 |   |
 565  * |   |   |   |   |   |   |   |   |   v
 566  * |   |   |   |   |   +---+---+---+   ^ short_size
 567  * |   |   |   |   |   |               |
 568  * +---+---+---+---+---+               v big_size
 569  * <------------------> <---------->
 570  *      big columns     short columns
 571  *
 572  */
 573
 574
 575
 576
 577 /*
 578  * Reconstruct single data column using P parity
 579  *
 580  * @syn_method  raidz_add_abd()
 581  * @rec_method  not applicable
 582  *
 583  * @rr          RAIDZ row
 584  * @tgtidx      array of missing data indexes
 585  */
 586 static raidz_inline int
 587 raidz_reconstruct_p_impl(raidz_row_t *rr, const int *tgtidx)
 588 {
 589         size_t c;
 590         const size_t firstdc = rr->rr_firstdatacol;
 591         const size_t ncols = rr->rr_cols;
 592         const size_t x = tgtidx[TARGET_X];
 593         const size_t xsize = rr->rr_col[x].rc_size;
 594         abd_t *xabd = rr->rr_col[x].rc_abd;
 595         size_t size;
 596         abd_t *dabd;
 597
 598         if (xabd == NULL)
 599                 return (1 << CODE_P);
 600
 601         raidz_math_begin();
 602
 603         /* copy P into target */
 604         raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, xsize);
 605
 606         /* generate p_syndrome */
 607         for (c = firstdc; c < ncols; c++) {
 608                 if (c == x)
 609                         continue;
 610
 611                 dabd = rr->rr_col[c].rc_abd;
 612                 size = MIN(rr->rr_col[c].rc_size, xsize);
 613
 614                 raidz_add(xabd, dabd, size);
 615         }
 616
 617         raidz_math_end();
 618
 619         return (1 << CODE_P);
 620 }
 621
 622
 623 /*
 624  * Generate Q syndrome (Qsyn)
 625  *
 626  * @xc          array of pointers to syndrome columns
 627  * @dc          data column (NULL if missing)
 628  * @xsize       size of syndrome columns
 629  * @dsize       size of data column (0 if missing)
 630  */
 631 static void
 632 raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize,
 633     const size_t dsize)
 634 {
 635         v_t *x = (v_t *)xc[TARGET_X];
 636         const v_t *d = (const v_t *)dc;
 637         const v_t * const dend = d + (dsize / sizeof (v_t));
 638         const v_t * const xend = x + (xsize / sizeof (v_t));
 639
 640         SYN_Q_DEFINE();
 641
 642         MUL2_SETUP();
 643
 644         for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
 645                 LOAD(d, SYN_Q_D);
 646                 Q_D_SYNDROME(SYN_Q_D, SYN_Q_X, x);
 647         }
 648         for (; x < xend; x += SYN_STRIDE) {
 649                 Q_SYNDROME(SYN_Q_X, x);
 650         }
 651 }
 652
 653
 654 /*
 655  * Reconstruct single data column using Q parity
 656  *
 657  * @syn_method  raidz_add_abd()
 658  * @rec_method  raidz_mul_abd_cb()
 659  *
 660  * @rr          RAIDZ row
 661  * @tgtidx      array of missing data indexes
 662  */
 663 static raidz_inline int
 664 raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
 665 {
 666         size_t c;
 667         size_t dsize;
 668         abd_t *dabd;
 669         const size_t firstdc = rr->rr_firstdatacol;
 670         const size_t ncols = rr->rr_cols;
 671         const size_t x = tgtidx[TARGET_X];
 672         abd_t *xabd = rr->rr_col[x].rc_abd;
 673         const size_t xsize = rr->rr_col[x].rc_size;
 674         abd_t *tabds[] = { xabd };
 675
 676         if (xabd == NULL)
 677                 return (1 << CODE_Q);
 678
 679         unsigned coeff[MUL_CNT];
 680         raidz_rec_q_coeff(rr, tgtidx, coeff);
 681
 682         raidz_math_begin();
 683
 684         /* Start with first data column if present */
 685         if (firstdc != x) {
 686                 raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
 687         } else {
 688                 raidz_zero(xabd, xsize);
 689         }
 690
 691         /* generate q_syndrome */
 692         for (c = firstdc+1; c < ncols; c++) {
 693                 if (c == x) {
 694                         dabd = NULL;
 695                         dsize = 0;
 696                 } else {
 697                         dabd = rr->rr_col[c].rc_abd;
 698                         dsize = rr->rr_col[c].rc_size;
 699                 }
 700
 701                 abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
 702                     raidz_syn_q_abd);
 703         }
 704
 705         /* add Q to the syndrome */
 706         raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, xsize);
 707
 708         /* transform the syndrome */
 709         abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
 710
 711         raidz_math_end();
 712
 713         return (1 << CODE_Q);
 714 }
 715
 716
 717 /*
 718  * Generate R syndrome (Rsyn)
 719  *
 720  * @xc          array of pointers to syndrome columns
 721  * @dc          data column (NULL if missing)
 722  * @tsize       size of syndrome columns
 723  * @dsize       size of data column (0 if missing)
 724  */
 725 static void
 726 raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize,
 727     const size_t dsize)
 728 {
 729         v_t *x = (v_t *)xc[TARGET_X];
 730         const v_t *d = (const v_t *)dc;
 731         const v_t * const dend = d + (dsize / sizeof (v_t));
 732         const v_t * const xend = x + (tsize / sizeof (v_t));
 733
 734         SYN_R_DEFINE();
 735
 736         MUL2_SETUP();
 737
 738         for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
 739                 LOAD(d, SYN_R_D);
 740                 R_D_SYNDROME(SYN_R_D, SYN_R_X, x);
 741         }
 742         for (; x < xend; x += SYN_STRIDE) {
 743                 R_SYNDROME(SYN_R_X, x);
 744         }
 745 }
 746
 747
 748 /*
 749  * Reconstruct single data column using R parity
 750  *
 751  * @syn_method  raidz_add_abd()
 752  * @rec_method  raidz_mul_abd_cb()
 753  *
 754  * @rr          RAIDZ rr
 755  * @tgtidx      array of missing data indexes
 756  */
 757 static raidz_inline int
 758 raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
 759 {
 760         size_t c;
 761         size_t dsize;
 762         abd_t *dabd;
 763         const size_t firstdc = rr->rr_firstdatacol;
 764         const size_t ncols = rr->rr_cols;
 765         const size_t x = tgtidx[TARGET_X];
 766         const size_t xsize = rr->rr_col[x].rc_size;
 767         abd_t *xabd = rr->rr_col[x].rc_abd;
 768         abd_t *tabds[] = { xabd };
 769
 770         if (xabd == NULL)
 771                 return (1 << CODE_R);
 772
 773         unsigned coeff[MUL_CNT];
 774         raidz_rec_r_coeff(rr, tgtidx, coeff);
 775
 776         raidz_math_begin();
 777
 778         /* Start with first data column if present */
 779         if (firstdc != x) {
 780                 raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
 781         } else {
 782                 raidz_zero(xabd, xsize);
 783         }
 784
 785
 786         /* generate q_syndrome */
 787         for (c = firstdc+1; c < ncols; c++) {
 788                 if (c == x) {
 789                         dabd = NULL;
 790                         dsize = 0;
 791                 } else {
 792                         dabd = rr->rr_col[c].rc_abd;
 793                         dsize = rr->rr_col[c].rc_size;
 794                 }
 795
 796                 abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
 797                     raidz_syn_r_abd);
 798         }
 799
 800         /* add R to the syndrome */
 801         raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, xsize);
 802
 803         /* transform the syndrome */
 804         abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
 805
 806         raidz_math_end();
 807
 808         return (1 << CODE_R);
 809 }
 810
 811
 812 /*
 813  * Generate P and Q syndromes
 814  *
 815  * @xc          array of pointers to syndrome columns
 816  * @dc          data column (NULL if missing)
 817  * @tsize       size of syndrome columns
 818  * @dsize       size of data column (0 if missing)
 819  */
 820 static void
 821 raidz_syn_pq_abd(void **tc, const void *dc, const size_t tsize,
 822     const size_t dsize)
 823 {
 824         v_t *x = (v_t *)tc[TARGET_X];
 825         v_t *y = (v_t *)tc[TARGET_Y];
 826         const v_t *d = (const v_t *)dc;
 827         const v_t * const dend = d + (dsize / sizeof (v_t));
 828         const v_t * const yend = y + (tsize / sizeof (v_t));
 829
 830         SYN_PQ_DEFINE();
 831
 832         MUL2_SETUP();
 833
 834         for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
 835                 LOAD(d, SYN_PQ_D);
 836                 P_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, x);
 837                 Q_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, y);
 838         }
 839         for (; y < yend; y += SYN_STRIDE) {
 840                 Q_SYNDROME(SYN_PQ_X, y);
 841         }
 842 }
 843
 844 /*
 845  * Reconstruct data using PQ parity and PQ syndromes
 846  *
 847  * @tc          syndrome/result columns
 848  * @tsize       size of syndrome/result columns
 849  * @c           parity columns
 850  * @mul         array of multiplication constants
 851  */
 852 static void
 853 raidz_rec_pq_abd(void **tc, const size_t tsize, void **c,
 854     const unsigned *mul)
 855 {
 856         v_t *x = (v_t *)tc[TARGET_X];
 857         v_t *y = (v_t *)tc[TARGET_Y];
 858         const v_t * const xend = x + (tsize / sizeof (v_t));
 859         const v_t *p = (v_t *)c[CODE_P];
 860         const v_t *q = (v_t *)c[CODE_Q];
 861
 862         REC_PQ_DEFINE();
 863
 864         for (; x < xend; x += REC_PQ_STRIDE, y += REC_PQ_STRIDE,
 865             p += REC_PQ_STRIDE, q += REC_PQ_STRIDE) {
 866                 LOAD(x, REC_PQ_X);
 867                 LOAD(y, REC_PQ_Y);
 868
 869                 XOR_ACC(p, REC_PQ_X);
 870                 XOR_ACC(q, REC_PQ_Y);
 871
 872                 /* Save Pxy */
 873                 COPY(REC_PQ_X,  REC_PQ_T);
 874
 875                 /* Calc X */
 876                 MUL(mul[MUL_PQ_X], REC_PQ_X);
 877                 MUL(mul[MUL_PQ_Y], REC_PQ_Y);
 878                 XOR(REC_PQ_Y,  REC_PQ_X);
 879                 STORE(x, REC_PQ_X);
 880
 881                 /* Calc Y */
 882                 XOR(REC_PQ_T,  REC_PQ_X);
 883                 STORE(y, REC_PQ_X);
 884         }
 885 }
 886
 887
 888 /*
 889  * Reconstruct two data columns using PQ parity
 890  *
 891  * @syn_method  raidz_syn_pq_abd()
 892  * @rec_method  raidz_rec_pq_abd()
 893  *
 894  * @rr          RAIDZ row
 895  * @tgtidx      array of missing data indexes
 896  */
 897 static raidz_inline int
 898 raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
 899 {
 900         size_t c;
 901         size_t dsize;
 902         abd_t *dabd;
 903         const size_t firstdc = rr->rr_firstdatacol;
 904         const size_t ncols = rr->rr_cols;
 905         const size_t x = tgtidx[TARGET_X];
 906         const size_t y = tgtidx[TARGET_Y];
 907         const size_t xsize = rr->rr_col[x].rc_size;
 908         const size_t ysize = rr->rr_col[y].rc_size;
 909         abd_t *xabd = rr->rr_col[x].rc_abd;
 910         abd_t *yabd = rr->rr_col[y].rc_abd;
 911         abd_t *tabds[2] = { xabd, yabd };
 912         abd_t *cabds[] = {
 913                 rr->rr_col[CODE_P].rc_abd,
 914                 rr->rr_col[CODE_Q].rc_abd
 915         };
 916
 917         if (xabd == NULL)
 918                 return ((1 << CODE_P) | (1 << CODE_Q));
 919
 920         unsigned coeff[MUL_CNT];
 921         raidz_rec_pq_coeff(rr, tgtidx, coeff);
 922
 923         /*
 924          * Check if some of targets is shorter then others
 925          * In this case, shorter target needs to be replaced with
 926          * new buffer so that syndrome can be calculated.
 927          */
 928         if (ysize < xsize) {
 929                 yabd = abd_alloc(xsize, B_FALSE);
 930                 tabds[1] = yabd;
 931         }
 932
 933         raidz_math_begin();
 934
 935         /* Start with first data column if present */
 936         if (firstdc != x) {
 937                 raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
 938                 raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
 939         } else {
 940                 raidz_zero(xabd, xsize);
 941                 raidz_zero(yabd, xsize);
 942         }
 943
 944         /* generate q_syndrome */
 945         for (c = firstdc+1; c < ncols; c++) {
 946                 if (c == x || c == y) {
 947                         dabd = NULL;
 948                         dsize = 0;
 949                 } else {
 950                         dabd = rr->rr_col[c].rc_abd;
 951                         dsize = rr->rr_col[c].rc_size;
 952                 }
 953
 954                 abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
 955                     raidz_syn_pq_abd);
 956         }
 957
 958         abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pq_abd, coeff);
 959
 960         /* Copy shorter targets back to the original abd buffer */
 961         if (ysize < xsize)
 962                 raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
 963
 964         raidz_math_end();
 965
 966         if (ysize < xsize)
 967                 abd_free(yabd);
 968
 969         return ((1 << CODE_P) | (1 << CODE_Q));
 970 }
 971
 972
 973 /*
 974  * Generate P and R syndromes
 975  *
 976  * @xc          array of pointers to syndrome columns
 977  * @dc          data column (NULL if missing)
 978  * @tsize       size of syndrome columns
 979  * @dsize       size of data column (0 if missing)
 980  */
 981 static void
 982 raidz_syn_pr_abd(void **c, const void *dc, const size_t tsize,
 983     const size_t dsize)
 984 {
 985         v_t *x = (v_t *)c[TARGET_X];
 986         v_t *y = (v_t *)c[TARGET_Y];
 987         const v_t *d = (const v_t *)dc;
 988         const v_t * const dend = d + (dsize / sizeof (v_t));
 989         const v_t * const yend = y + (tsize / sizeof (v_t));
 990
 991         SYN_PR_DEFINE();
 992
 993         MUL2_SETUP();
 994
 995         for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
 996                 LOAD(d, SYN_PR_D);
 997                 P_D_SYNDROME(SYN_PR_D, SYN_PR_X, x);
 998                 R_D_SYNDROME(SYN_PR_D, SYN_PR_X, y);
 999         }
1000         for (; y < yend; y += SYN_STRIDE) {
1001                 R_SYNDROME(SYN_PR_X, y);
1002         }
1003 }
1004
1005 /*
1006  * Reconstruct data using PR parity and PR syndromes
1007  *
1008  * @tc          syndrome/result columns
1009  * @tsize       size of syndrome/result columns
1010  * @c           parity columns
1011  * @mul         array of multiplication constants
1012  */
1013 static void
1014 raidz_rec_pr_abd(void **t, const size_t tsize, void **c,
1015     const unsigned *mul)
1016 {
1017         v_t *x = (v_t *)t[TARGET_X];
1018         v_t *y = (v_t *)t[TARGET_Y];
1019         const v_t * const xend = x + (tsize / sizeof (v_t));
1020         const v_t *p = (v_t *)c[CODE_P];
1021         const v_t *q = (v_t *)c[CODE_Q];
1022
1023         REC_PR_DEFINE();
1024
1025         for (; x < xend; x += REC_PR_STRIDE, y += REC_PR_STRIDE,
1026             p += REC_PR_STRIDE, q += REC_PR_STRIDE) {
1027                 LOAD(x, REC_PR_X);
1028                 LOAD(y, REC_PR_Y);
1029                 XOR_ACC(p, REC_PR_X);
1030                 XOR_ACC(q, REC_PR_Y);
1031
1032                 /* Save Pxy */
1033                 COPY(REC_PR_X,  REC_PR_T);
1034
1035                 /* Calc X */
1036                 MUL(mul[MUL_PR_X], REC_PR_X);
1037                 MUL(mul[MUL_PR_Y], REC_PR_Y);
1038                 XOR(REC_PR_Y,  REC_PR_X);
1039                 STORE(x, REC_PR_X);
1040
1041                 /* Calc Y */
1042                 XOR(REC_PR_T,  REC_PR_X);
1043                 STORE(y, REC_PR_X);
1044         }
1045 }
1046
1047
1048 /*
1049  * Reconstruct two data columns using PR parity
1050  *
1051  * @syn_method  raidz_syn_pr_abd()
1052  * @rec_method  raidz_rec_pr_abd()
1053  *
1054  * @rr          RAIDZ row
1055  * @tgtidx      array of missing data indexes
1056  */
1057 static raidz_inline int
1058 raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
1059 {
1060         size_t c;
1061         size_t dsize;
1062         abd_t *dabd;
1063         const size_t firstdc = rr->rr_firstdatacol;
1064         const size_t ncols = rr->rr_cols;
1065         const size_t x = tgtidx[0];
1066         const size_t y = tgtidx[1];
1067         const size_t xsize = rr->rr_col[x].rc_size;
1068         const size_t ysize = rr->rr_col[y].rc_size;
1069         abd_t *xabd = rr->rr_col[x].rc_abd;
1070         abd_t *yabd = rr->rr_col[y].rc_abd;
1071         abd_t *tabds[2] = { xabd, yabd };
1072         abd_t *cabds[] = {
1073                 rr->rr_col[CODE_P].rc_abd,
1074                 rr->rr_col[CODE_R].rc_abd
1075         };
1076
1077         if (xabd == NULL)
1078                 return ((1 << CODE_P) | (1 << CODE_R));
1079
1080         unsigned coeff[MUL_CNT];
1081         raidz_rec_pr_coeff(rr, tgtidx, coeff);
1082
1083         /*
1084          * Check if some of targets are shorter then others.
1085          * They need to be replaced with a new buffer so that syndrome can
1086          * be calculated on full length.
1087          */
1088         if (ysize < xsize) {
1089                 yabd = abd_alloc(xsize, B_FALSE);
1090                 tabds[1] = yabd;
1091         }
1092
1093         raidz_math_begin();
1094
1095         /* Start with first data column if present */
1096         if (firstdc != x) {
1097                 raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
1098                 raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
1099         } else {
1100                 raidz_zero(xabd, xsize);
1101                 raidz_zero(yabd, xsize);
1102         }
1103
1104         /* generate q_syndrome */
1105         for (c = firstdc+1; c < ncols; c++) {
1106                 if (c == x || c == y) {
1107                         dabd = NULL;
1108                         dsize = 0;
1109                 } else {
1110                         dabd = rr->rr_col[c].rc_abd;
1111                         dsize = rr->rr_col[c].rc_size;
1112                 }
1113
1114                 abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
1115                     raidz_syn_pr_abd);
1116         }
1117
1118         abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pr_abd, coeff);
1119
1120         /*
1121          * Copy shorter targets back to the original abd buffer
1122          */
1123         if (ysize < xsize)
1124                 raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
1125
1126         raidz_math_end();
1127
1128         if (ysize < xsize)
1129                 abd_free(yabd);
1130
1131         return ((1 << CODE_P) | (1 << CODE_R));
1132 }
1133
1134
1135 /*
1136  * Generate Q and R syndromes
1137  *
1138  * @xc          array of pointers to syndrome columns
1139  * @dc          data column (NULL if missing)
1140  * @tsize       size of syndrome columns
1141  * @dsize       size of data column (0 if missing)
1142  */
1143 static void
1144 raidz_syn_qr_abd(void **c, const void *dc, const size_t tsize,
1145     const size_t dsize)
1146 {
1147         v_t *x = (v_t *)c[TARGET_X];
1148         v_t *y = (v_t *)c[TARGET_Y];
1149         const v_t * const xend = x + (tsize / sizeof (v_t));
1150         const v_t *d = (const v_t *)dc;
1151         const v_t * const dend = d + (dsize / sizeof (v_t));
1152
1153         SYN_QR_DEFINE();
1154
1155         MUL2_SETUP();
1156
1157         for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
1158                 LOAD(d, SYN_PQ_D);
1159                 Q_D_SYNDROME(SYN_QR_D, SYN_QR_X, x);
1160                 R_D_SYNDROME(SYN_QR_D, SYN_QR_X, y);
1161         }
1162         for (; x < xend; x += SYN_STRIDE, y += SYN_STRIDE) {
1163                 Q_SYNDROME(SYN_QR_X, x);
1164                 R_SYNDROME(SYN_QR_X, y);
1165         }
1166 }
1167
1168
1169 /*
1170  * Reconstruct data using QR parity and QR syndromes
1171  *
1172  * @tc          syndrome/result columns
1173  * @tsize       size of syndrome/result columns
1174  * @c           parity columns
1175  * @mul         array of multiplication constants
1176  */
1177 static void
1178 raidz_rec_qr_abd(void **t, const size_t tsize, void **c,
1179     const unsigned *mul)
1180 {
1181         v_t *x = (v_t *)t[TARGET_X];
1182         v_t *y = (v_t *)t[TARGET_Y];
1183         const v_t * const xend = x + (tsize / sizeof (v_t));
1184         const v_t *p = (v_t *)c[CODE_P];
1185         const v_t *q = (v_t *)c[CODE_Q];
1186
1187         REC_QR_DEFINE();
1188
1189         for (; x < xend; x += REC_QR_STRIDE, y += REC_QR_STRIDE,
1190             p += REC_QR_STRIDE, q += REC_QR_STRIDE) {
1191                 LOAD(x, REC_QR_X);
1192                 LOAD(y, REC_QR_Y);
1193
1194                 XOR_ACC(p, REC_QR_X);
1195                 XOR_ACC(q, REC_QR_Y);
1196
1197                 /* Save Pxy */
1198                 COPY(REC_QR_X,  REC_QR_T);
1199
1200                 /* Calc X */
1201                 MUL(mul[MUL_QR_XQ], REC_QR_X);  /* X = Q * xqm */
1202                 XOR(REC_QR_Y, REC_QR_X);        /* X = R ^ X   */
1203                 MUL(mul[MUL_QR_X], REC_QR_X);   /* X = X * xm  */
1204                 STORE(x, REC_QR_X);
1205
1206                 /* Calc Y */
1207                 MUL(mul[MUL_QR_YQ], REC_QR_T);  /* X = Q * xqm */
1208                 XOR(REC_QR_Y, REC_QR_T);        /* X = R ^ X   */
1209                 MUL(mul[MUL_QR_Y], REC_QR_T);   /* X = X * xm  */
1210                 STORE(y, REC_QR_T);
1211         }
1212 }
1213
1214
1215 /*
1216  * Reconstruct two data columns using QR parity
1217  *
1218  * @syn_method  raidz_syn_qr_abd()
1219  * @rec_method  raidz_rec_qr_abd()
1220  *
1221  * @rr          RAIDZ row
1222  * @tgtidx      array of missing data indexes
1223  */
1224 static raidz_inline int
1225 raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
1226 {
1227         size_t c;
1228         size_t dsize;
1229         abd_t *dabd;
1230         const size_t firstdc = rr->rr_firstdatacol;
1231         const size_t ncols = rr->rr_cols;
1232         const size_t x = tgtidx[TARGET_X];
1233         const size_t y = tgtidx[TARGET_Y];
1234         const size_t xsize = rr->rr_col[x].rc_size;
1235         const size_t ysize = rr->rr_col[y].rc_size;
1236         abd_t *xabd = rr->rr_col[x].rc_abd;
1237         abd_t *yabd = rr->rr_col[y].rc_abd;
1238         abd_t *tabds[2] = { xabd, yabd };
1239         abd_t *cabds[] = {
1240                 rr->rr_col[CODE_Q].rc_abd,
1241                 rr->rr_col[CODE_R].rc_abd
1242         };
1243
1244         if (xabd == NULL)
1245                 return ((1 << CODE_Q) | (1 << CODE_R));
1246
1247         unsigned coeff[MUL_CNT];
1248         raidz_rec_qr_coeff(rr, tgtidx, coeff);
1249
1250         /*
1251          * Check if some of targets is shorter then others
1252          * In this case, shorter target needs to be replaced with
1253          * new buffer so that syndrome can be calculated.
1254          */
1255         if (ysize < xsize) {
1256                 yabd = abd_alloc(xsize, B_FALSE);
1257                 tabds[1] = yabd;
1258         }
1259
1260         raidz_math_begin();
1261
1262         /* Start with first data column if present */
1263         if (firstdc != x) {
1264                 raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
1265                 raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
1266         } else {
1267                 raidz_zero(xabd, xsize);
1268                 raidz_zero(yabd, xsize);
1269         }
1270
1271         /* generate q_syndrome */
1272         for (c = firstdc+1; c < ncols; c++) {
1273                 if (c == x || c == y) {
1274                         dabd = NULL;
1275                         dsize = 0;
1276                 } else {
1277                         dabd = rr->rr_col[c].rc_abd;
1278                         dsize = rr->rr_col[c].rc_size;
1279                 }
1280
1281                 abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
1282                     raidz_syn_qr_abd);
1283         }
1284
1285         abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_qr_abd, coeff);
1286
1287         /*
1288          * Copy shorter targets back to the original abd buffer
1289          */
1290         if (ysize < xsize)
1291                 raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
1292
1293         raidz_math_end();
1294
1295         if (ysize < xsize)
1296                 abd_free(yabd);
1297
1298
1299         return ((1 << CODE_Q) | (1 << CODE_R));
1300 }
1301
1302
1303 /*
1304  * Generate P, Q, and R syndromes
1305  *
1306  * @xc          array of pointers to syndrome columns
1307  * @dc          data column (NULL if missing)
1308  * @tsize       size of syndrome columns
1309  * @dsize       size of data column (0 if missing)
1310  */
1311 static void
1312 raidz_syn_pqr_abd(void **c, const void *dc, const size_t tsize,
1313     const size_t dsize)
1314 {
1315         v_t *x = (v_t *)c[TARGET_X];
1316         v_t *y = (v_t *)c[TARGET_Y];
1317         v_t *z = (v_t *)c[TARGET_Z];
1318         const v_t * const yend = y + (tsize / sizeof (v_t));
1319         const v_t *d = (const v_t *)dc;
1320         const v_t * const dend = d + (dsize / sizeof (v_t));
1321
1322         SYN_PQR_DEFINE();
1323
1324         MUL2_SETUP();
1325
1326         for (; d < dend;  d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE,
1327             z += SYN_STRIDE) {
1328                 LOAD(d, SYN_PQR_D);
1329                 P_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, x)
1330                 Q_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, y);
1331                 R_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, z);
1332         }
1333         for (; y < yend; y += SYN_STRIDE, z += SYN_STRIDE) {
1334                 Q_SYNDROME(SYN_PQR_X, y);
1335                 R_SYNDROME(SYN_PQR_X, z);
1336         }
1337 }
1338
1339
1340 /*
1341  * Reconstruct data using PRQ parity and PQR syndromes
1342  *
1343  * @tc          syndrome/result columns
1344  * @tsize       size of syndrome/result columns
1345  * @c           parity columns
1346  * @mul         array of multiplication constants
1347  */
1348 static void
1349 raidz_rec_pqr_abd(void **t, const size_t tsize, void **c,
1350     const unsigned * const mul)
1351 {
1352         v_t *x = (v_t *)t[TARGET_X];
1353         v_t *y = (v_t *)t[TARGET_Y];
1354         v_t *z = (v_t *)t[TARGET_Z];
1355         const v_t * const xend = x + (tsize / sizeof (v_t));
1356         const v_t *p = (v_t *)c[CODE_P];
1357         const v_t *q = (v_t *)c[CODE_Q];
1358         const v_t *r = (v_t *)c[CODE_R];
1359
1360         REC_PQR_DEFINE();
1361
1362         for (; x < xend; x += REC_PQR_STRIDE, y += REC_PQR_STRIDE,
1363             z += REC_PQR_STRIDE, p += REC_PQR_STRIDE, q += REC_PQR_STRIDE,
1364             r += REC_PQR_STRIDE) {
1365                 LOAD(x, REC_PQR_X);
1366                 LOAD(y, REC_PQR_Y);
1367                 LOAD(z, REC_PQR_Z);
1368
1369                 XOR_ACC(p, REC_PQR_X);
1370                 XOR_ACC(q, REC_PQR_Y);
1371                 XOR_ACC(r, REC_PQR_Z);
1372
1373                 /* Save Pxyz and Qxyz */
1374                 COPY(REC_PQR_X, REC_PQR_XS);
1375                 COPY(REC_PQR_Y, REC_PQR_YS);
1376
1377                 /* Calc X */
1378                 MUL(mul[MUL_PQR_XP], REC_PQR_X);        /* Xp = Pxyz * xp   */
1379                 MUL(mul[MUL_PQR_XQ], REC_PQR_Y);        /* Xq = Qxyz * xq   */
1380                 XOR(REC_PQR_Y, REC_PQR_X);
1381                 MUL(mul[MUL_PQR_XR], REC_PQR_Z);        /* Xr = Rxyz * xr   */
1382                 XOR(REC_PQR_Z, REC_PQR_X);              /* X = Xp + Xq + Xr */
1383                 STORE(x, REC_PQR_X);
1384
1385                 /* Calc Y */
1386                 XOR(REC_PQR_X, REC_PQR_XS);             /* Pyz = Pxyz + X */
1387                 MUL(mul[MUL_PQR_YU], REC_PQR_X);        /* Xq = X * upd_q */
1388                 XOR(REC_PQR_X, REC_PQR_YS);             /* Qyz = Qxyz + Xq */
1389                 COPY(REC_PQR_XS, REC_PQR_X);            /* restore Pyz */
1390                 MUL(mul[MUL_PQR_YP], REC_PQR_X);        /* Yp = Pyz * yp */
1391                 MUL(mul[MUL_PQR_YQ], REC_PQR_YS);       /* Yq = Qyz * yq */
1392                 XOR(REC_PQR_X, REC_PQR_YS);             /* Y = Yp + Yq */
1393                 STORE(y, REC_PQR_YS);
1394
1395                 /* Calc Z */
1396                 XOR(REC_PQR_XS, REC_PQR_YS);            /* Z = Pz = Pyz + Y */
1397                 STORE(z, REC_PQR_YS);
1398         }
1399 }
1400
1401
1402 /*
1403  * Reconstruct three data columns using PQR parity
1404  *
1405  * @syn_method  raidz_syn_pqr_abd()
1406  * @rec_method  raidz_rec_pqr_abd()
1407  *
1408  * @rr          RAIDZ row
1409  * @tgtidx      array of missing data indexes
1410  */
1411 static raidz_inline int
1412 raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
1413 {
1414         size_t c;
1415         size_t dsize;
1416         abd_t *dabd;
1417         const size_t firstdc = rr->rr_firstdatacol;
1418         const size_t ncols = rr->rr_cols;
1419         const size_t x = tgtidx[TARGET_X];
1420         const size_t y = tgtidx[TARGET_Y];
1421         const size_t z = tgtidx[TARGET_Z];
1422         const size_t xsize = rr->rr_col[x].rc_size;
1423         const size_t ysize = rr->rr_col[y].rc_size;
1424         const size_t zsize = rr->rr_col[z].rc_size;
1425         abd_t *xabd = rr->rr_col[x].rc_abd;
1426         abd_t *yabd = rr->rr_col[y].rc_abd;
1427         abd_t *zabd = rr->rr_col[z].rc_abd;
1428         abd_t *tabds[] = { xabd, yabd, zabd };
1429         abd_t *cabds[] = {
1430                 rr->rr_col[CODE_P].rc_abd,
1431                 rr->rr_col[CODE_Q].rc_abd,
1432                 rr->rr_col[CODE_R].rc_abd
1433         };
1434
1435         if (xabd == NULL)
1436                 return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
1437
1438         unsigned coeff[MUL_CNT];
1439         raidz_rec_pqr_coeff(rr, tgtidx, coeff);
1440
1441         /*
1442          * Check if some of targets is shorter then others
1443          * In this case, shorter target needs to be replaced with
1444          * new buffer so that syndrome can be calculated.
1445          */
1446         if (ysize < xsize) {
1447                 yabd = abd_alloc(xsize, B_FALSE);
1448                 tabds[1] = yabd;
1449         }
1450         if (zsize < xsize) {
1451                 zabd = abd_alloc(xsize, B_FALSE);
1452                 tabds[2] = zabd;
1453         }
1454
1455         raidz_math_begin();
1456
1457         /* Start with first data column if present */
1458         if (firstdc != x) {
1459                 raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
1460                 raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
1461                 raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, xsize);
1462         } else {
1463                 raidz_zero(xabd, xsize);
1464                 raidz_zero(yabd, xsize);
1465                 raidz_zero(zabd, xsize);
1466         }
1467
1468         /* generate q_syndrome */
1469         for (c = firstdc+1; c < ncols; c++) {
1470                 if (c == x || c == y || c == z) {
1471                         dabd = NULL;
1472                         dsize = 0;
1473                 } else {
1474                         dabd = rr->rr_col[c].rc_abd;
1475                         dsize = rr->rr_col[c].rc_size;
1476                 }
1477
1478                 abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3,
1479                     raidz_syn_pqr_abd);
1480         }
1481
1482         abd_raidz_rec_iterate(cabds, tabds, xsize, 3, raidz_rec_pqr_abd, coeff);
1483
1484         /*
1485          * Copy shorter targets back to the original abd buffer
1486          */
1487         if (ysize < xsize)
1488                 raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
1489         if (zsize < xsize)
1490                 raidz_copy(rr->rr_col[z].rc_abd, zabd, zsize);
1491
1492         raidz_math_end();
1493
1494         if (ysize < xsize)
1495                 abd_free(yabd);
1496         if (zsize < xsize)
1497                 abd_free(zabd);
1498
1499         return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
1500 }
1501
1502 #endif /* _VDEV_RAIDZ_MATH_IMPL_H */