Fix O_APPEND for Linux 3.15 and older kernels
[zfs.git] / module / os / linux / zfs / qat_compress.c
blob1d099c95bc7c24e1fbf3309599476636a758e17b
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 #if defined(_KERNEL) && defined(HAVE_QAT)
23 #include <linux/slab.h>
24 #include <linux/vmalloc.h>
25 #include <linux/pagemap.h>
26 #include <linux/completion.h>
27 #include <sys/zfs_context.h>
28 #include <sys/byteorder.h>
29 #include <sys/zio.h>
30 #include <sys/qat.h>
33 * Max instances in a QAT device, each instance is a channel to submit
34 * jobs to QAT hardware, this is only for pre-allocating instance and
35 * session arrays; the actual number of instances are defined in the
36 * QAT driver's configuration file.
38 #define QAT_DC_MAX_INSTANCES 48
41 * ZLIB head and foot size
43 #define ZLIB_HEAD_SZ 2
44 #define ZLIB_FOOT_SZ 4
46 static CpaInstanceHandle dc_inst_handles[QAT_DC_MAX_INSTANCES];
47 static CpaDcSessionHandle session_handles[QAT_DC_MAX_INSTANCES];
48 static CpaBufferList **buffer_array[QAT_DC_MAX_INSTANCES];
49 static Cpa16U num_inst = 0;
50 static Cpa32U inst_num = 0;
51 static boolean_t qat_dc_init_done = B_FALSE;
52 int zfs_qat_compress_disable = 0;
54 boolean_t
55 qat_dc_use_accel(size_t s_len)
57 return (!zfs_qat_compress_disable &&
58 qat_dc_init_done &&
59 s_len >= QAT_MIN_BUF_SIZE &&
60 s_len <= QAT_MAX_BUF_SIZE);
63 static void
64 qat_dc_callback(void *p_callback, CpaStatus status)
66 if (p_callback != NULL)
67 complete((struct completion *)p_callback);
70 static void
71 qat_dc_clean(void)
73 Cpa16U buff_num = 0;
74 Cpa16U num_inter_buff_lists = 0;
76 for (Cpa16U i = 0; i < num_inst; i++) {
77 cpaDcStopInstance(dc_inst_handles[i]);
78 QAT_PHYS_CONTIG_FREE(session_handles[i]);
79 /* free intermediate buffers */
80 if (buffer_array[i] != NULL) {
81 cpaDcGetNumIntermediateBuffers(
82 dc_inst_handles[i], &num_inter_buff_lists);
83 for (buff_num = 0; buff_num < num_inter_buff_lists;
84 buff_num++) {
85 CpaBufferList *buffer_inter =
86 buffer_array[i][buff_num];
87 if (buffer_inter->pBuffers) {
88 QAT_PHYS_CONTIG_FREE(
89 buffer_inter->pBuffers->pData);
90 QAT_PHYS_CONTIG_FREE(
91 buffer_inter->pBuffers);
93 QAT_PHYS_CONTIG_FREE(
94 buffer_inter->pPrivateMetaData);
95 QAT_PHYS_CONTIG_FREE(buffer_inter);
100 num_inst = 0;
101 qat_dc_init_done = B_FALSE;
105 qat_dc_init(void)
107 CpaStatus status = CPA_STATUS_SUCCESS;
108 Cpa32U sess_size = 0;
109 Cpa32U ctx_size = 0;
110 Cpa16U num_inter_buff_lists = 0;
111 Cpa16U buff_num = 0;
112 Cpa32U buff_meta_size = 0;
113 CpaDcSessionSetupData sd = {0};
115 if (qat_dc_init_done)
116 return (0);
118 status = cpaDcGetNumInstances(&num_inst);
119 if (status != CPA_STATUS_SUCCESS)
120 return (-1);
122 /* if the user has configured no QAT compression units just return */
123 if (num_inst == 0)
124 return (0);
126 if (num_inst > QAT_DC_MAX_INSTANCES)
127 num_inst = QAT_DC_MAX_INSTANCES;
129 status = cpaDcGetInstances(num_inst, &dc_inst_handles[0]);
130 if (status != CPA_STATUS_SUCCESS)
131 return (-1);
133 for (Cpa16U i = 0; i < num_inst; i++) {
134 cpaDcSetAddressTranslation(dc_inst_handles[i],
135 (void*)virt_to_phys);
137 status = cpaDcBufferListGetMetaSize(dc_inst_handles[i],
138 1, &buff_meta_size);
140 if (status == CPA_STATUS_SUCCESS)
141 status = cpaDcGetNumIntermediateBuffers(
142 dc_inst_handles[i], &num_inter_buff_lists);
144 if (status == CPA_STATUS_SUCCESS && num_inter_buff_lists != 0)
145 status = QAT_PHYS_CONTIG_ALLOC(&buffer_array[i],
146 num_inter_buff_lists *
147 sizeof (CpaBufferList *));
149 for (buff_num = 0; buff_num < num_inter_buff_lists;
150 buff_num++) {
151 if (status == CPA_STATUS_SUCCESS)
152 status = QAT_PHYS_CONTIG_ALLOC(
153 &buffer_array[i][buff_num],
154 sizeof (CpaBufferList));
156 if (status == CPA_STATUS_SUCCESS)
157 status = QAT_PHYS_CONTIG_ALLOC(
158 &buffer_array[i][buff_num]->
159 pPrivateMetaData,
160 buff_meta_size);
162 if (status == CPA_STATUS_SUCCESS)
163 status = QAT_PHYS_CONTIG_ALLOC(
164 &buffer_array[i][buff_num]->pBuffers,
165 sizeof (CpaFlatBuffer));
167 if (status == CPA_STATUS_SUCCESS) {
169 * implementation requires an intermediate
170 * buffer approximately twice the size of
171 * output buffer, which is 2x max buffer
172 * size here.
174 status = QAT_PHYS_CONTIG_ALLOC(
175 &buffer_array[i][buff_num]->pBuffers->
176 pData, 2 * QAT_MAX_BUF_SIZE);
177 if (status != CPA_STATUS_SUCCESS)
178 goto fail;
180 buffer_array[i][buff_num]->numBuffers = 1;
181 buffer_array[i][buff_num]->pBuffers->
182 dataLenInBytes = 2 * QAT_MAX_BUF_SIZE;
186 status = cpaDcStartInstance(dc_inst_handles[i],
187 num_inter_buff_lists, buffer_array[i]);
188 if (status != CPA_STATUS_SUCCESS)
189 goto fail;
191 sd.compLevel = CPA_DC_L1;
192 sd.compType = CPA_DC_DEFLATE;
193 sd.huffType = CPA_DC_HT_FULL_DYNAMIC;
194 sd.sessDirection = CPA_DC_DIR_COMBINED;
195 sd.sessState = CPA_DC_STATELESS;
196 sd.deflateWindowSize = 7;
197 sd.checksum = CPA_DC_ADLER32;
198 status = cpaDcGetSessionSize(dc_inst_handles[i],
199 &sd, &sess_size, &ctx_size);
200 if (status != CPA_STATUS_SUCCESS)
201 goto fail;
203 QAT_PHYS_CONTIG_ALLOC(&session_handles[i], sess_size);
204 if (session_handles[i] == NULL)
205 goto fail;
207 status = cpaDcInitSession(dc_inst_handles[i],
208 session_handles[i],
209 &sd, NULL, qat_dc_callback);
210 if (status != CPA_STATUS_SUCCESS)
211 goto fail;
214 qat_dc_init_done = B_TRUE;
215 return (0);
216 fail:
217 qat_dc_clean();
218 return (-1);
221 void
222 qat_dc_fini(void)
224 if (!qat_dc_init_done)
225 return;
227 qat_dc_clean();
231 * The "add" parameter is an additional buffer which is passed
232 * to QAT as a scratch buffer alongside the destination buffer
233 * in case the "compressed" data ends up being larger than the
234 * original source data. This is necessary to prevent QAT from
235 * generating buffer overflow warnings for incompressible data.
237 static int
238 qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len,
239 char *dst, int dst_len, char *add, int add_len, size_t *c_len)
241 CpaInstanceHandle dc_inst_handle;
242 CpaDcSessionHandle session_handle;
243 CpaBufferList *buf_list_src = NULL;
244 CpaBufferList *buf_list_dst = NULL;
245 CpaFlatBuffer *flat_buf_src = NULL;
246 CpaFlatBuffer *flat_buf_dst = NULL;
247 Cpa8U *buffer_meta_src = NULL;
248 Cpa8U *buffer_meta_dst = NULL;
249 Cpa32U buffer_meta_size = 0;
250 CpaDcRqResults dc_results;
251 CpaStatus status = CPA_STATUS_FAIL;
252 Cpa32U hdr_sz = 0;
253 Cpa32U compressed_sz;
254 Cpa32U num_src_buf = (src_len >> PAGE_SHIFT) + 2;
255 Cpa32U num_dst_buf = (dst_len >> PAGE_SHIFT) + 2;
256 Cpa32U num_add_buf = (add_len >> PAGE_SHIFT) + 2;
257 Cpa32U bytes_left;
258 Cpa32U dst_pages = 0;
259 Cpa32U adler32 = 0;
260 char *data;
261 struct page *page;
262 struct page **in_pages = NULL;
263 struct page **out_pages = NULL;
264 struct page **add_pages = NULL;
265 Cpa32U page_off = 0;
266 struct completion complete;
267 Cpa32U page_num = 0;
268 Cpa16U i;
271 * We increment num_src_buf and num_dst_buf by 2 to allow
272 * us to handle non page-aligned buffer addresses and buffers
273 * whose sizes are not divisible by PAGE_SIZE.
275 Cpa32U src_buffer_list_mem_size = sizeof (CpaBufferList) +
276 (num_src_buf * sizeof (CpaFlatBuffer));
277 Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) +
278 ((num_dst_buf + num_add_buf) * sizeof (CpaFlatBuffer));
280 status = QAT_PHYS_CONTIG_ALLOC(&in_pages,
281 num_src_buf * sizeof (struct page *));
282 if (status != CPA_STATUS_SUCCESS)
283 goto fail;
285 status = QAT_PHYS_CONTIG_ALLOC(&out_pages,
286 num_dst_buf * sizeof (struct page *));
287 if (status != CPA_STATUS_SUCCESS)
288 goto fail;
290 status = QAT_PHYS_CONTIG_ALLOC(&add_pages,
291 num_add_buf * sizeof (struct page *));
292 if (status != CPA_STATUS_SUCCESS)
293 goto fail;
295 i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
296 dc_inst_handle = dc_inst_handles[i];
297 session_handle = session_handles[i];
299 cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf,
300 &buffer_meta_size);
301 status = QAT_PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size);
302 if (status != CPA_STATUS_SUCCESS)
303 goto fail;
305 cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf + num_add_buf,
306 &buffer_meta_size);
307 status = QAT_PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size);
308 if (status != CPA_STATUS_SUCCESS)
309 goto fail;
311 /* build source buffer list */
312 status = QAT_PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size);
313 if (status != CPA_STATUS_SUCCESS)
314 goto fail;
316 flat_buf_src = (CpaFlatBuffer *)(buf_list_src + 1);
318 buf_list_src->pBuffers = flat_buf_src; /* always point to first one */
320 /* build destination buffer list */
321 status = QAT_PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size);
322 if (status != CPA_STATUS_SUCCESS)
323 goto fail;
325 flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
327 buf_list_dst->pBuffers = flat_buf_dst; /* always point to first one */
329 buf_list_src->numBuffers = 0;
330 buf_list_src->pPrivateMetaData = buffer_meta_src;
331 bytes_left = src_len;
332 data = src;
333 page_num = 0;
334 while (bytes_left > 0) {
335 page_off = ((long)data & ~PAGE_MASK);
336 page = qat_mem_to_page(data);
337 in_pages[page_num] = page;
338 flat_buf_src->pData = kmap(page) + page_off;
339 flat_buf_src->dataLenInBytes =
340 min((long)PAGE_SIZE - page_off, (long)bytes_left);
342 bytes_left -= flat_buf_src->dataLenInBytes;
343 data += flat_buf_src->dataLenInBytes;
344 flat_buf_src++;
345 buf_list_src->numBuffers++;
346 page_num++;
349 buf_list_dst->numBuffers = 0;
350 buf_list_dst->pPrivateMetaData = buffer_meta_dst;
351 bytes_left = dst_len;
352 data = dst;
353 page_num = 0;
354 while (bytes_left > 0) {
355 page_off = ((long)data & ~PAGE_MASK);
356 page = qat_mem_to_page(data);
357 flat_buf_dst->pData = kmap(page) + page_off;
358 out_pages[page_num] = page;
359 flat_buf_dst->dataLenInBytes =
360 min((long)PAGE_SIZE - page_off, (long)bytes_left);
362 bytes_left -= flat_buf_dst->dataLenInBytes;
363 data += flat_buf_dst->dataLenInBytes;
364 flat_buf_dst++;
365 buf_list_dst->numBuffers++;
366 page_num++;
367 dst_pages++;
370 /* map additional scratch pages into the destination buffer list */
371 bytes_left = add_len;
372 data = add;
373 page_num = 0;
374 while (bytes_left > 0) {
375 page_off = ((long)data & ~PAGE_MASK);
376 page = qat_mem_to_page(data);
377 flat_buf_dst->pData = kmap(page) + page_off;
378 add_pages[page_num] = page;
379 flat_buf_dst->dataLenInBytes =
380 min((long)PAGE_SIZE - page_off, (long)bytes_left);
382 bytes_left -= flat_buf_dst->dataLenInBytes;
383 data += flat_buf_dst->dataLenInBytes;
384 flat_buf_dst++;
385 buf_list_dst->numBuffers++;
386 page_num++;
389 init_completion(&complete);
391 if (dir == QAT_COMPRESS) {
392 QAT_STAT_BUMP(comp_requests);
393 QAT_STAT_INCR(comp_total_in_bytes, src_len);
395 cpaDcGenerateHeader(session_handle,
396 buf_list_dst->pBuffers, &hdr_sz);
397 buf_list_dst->pBuffers->pData += hdr_sz;
398 buf_list_dst->pBuffers->dataLenInBytes -= hdr_sz;
399 status = cpaDcCompressData(
400 dc_inst_handle, session_handle,
401 buf_list_src, buf_list_dst,
402 &dc_results, CPA_DC_FLUSH_FINAL,
403 &complete);
404 if (status != CPA_STATUS_SUCCESS) {
405 goto fail;
408 /* we now wait until the completion of the operation. */
409 wait_for_completion(&complete);
411 if (dc_results.status != CPA_STATUS_SUCCESS) {
412 status = CPA_STATUS_FAIL;
413 goto fail;
416 compressed_sz = dc_results.produced;
417 if (compressed_sz + hdr_sz + ZLIB_FOOT_SZ > dst_len) {
418 status = CPA_STATUS_INCOMPRESSIBLE;
419 goto fail;
422 /* get adler32 checksum and append footer */
423 *(Cpa32U*)(dst + hdr_sz + compressed_sz) =
424 BSWAP_32(dc_results.checksum);
426 *c_len = hdr_sz + compressed_sz + ZLIB_FOOT_SZ;
427 QAT_STAT_INCR(comp_total_out_bytes, *c_len);
428 } else {
429 ASSERT3U(dir, ==, QAT_DECOMPRESS);
430 QAT_STAT_BUMP(decomp_requests);
431 QAT_STAT_INCR(decomp_total_in_bytes, src_len);
433 buf_list_src->pBuffers->pData += ZLIB_HEAD_SZ;
434 buf_list_src->pBuffers->dataLenInBytes -= ZLIB_HEAD_SZ;
435 status = cpaDcDecompressData(dc_inst_handle, session_handle,
436 buf_list_src, buf_list_dst, &dc_results, CPA_DC_FLUSH_FINAL,
437 &complete);
439 if (CPA_STATUS_SUCCESS != status) {
440 status = CPA_STATUS_FAIL;
441 goto fail;
444 /* we now wait until the completion of the operation. */
445 wait_for_completion(&complete);
447 if (dc_results.status != CPA_STATUS_SUCCESS) {
448 status = CPA_STATUS_FAIL;
449 goto fail;
452 /* verify adler checksum */
453 adler32 = *(Cpa32U *)(src + dc_results.consumed + ZLIB_HEAD_SZ);
454 if (adler32 != BSWAP_32(dc_results.checksum)) {
455 status = CPA_STATUS_FAIL;
456 goto fail;
458 *c_len = dc_results.produced;
459 QAT_STAT_INCR(decomp_total_out_bytes, *c_len);
462 fail:
463 if (status != CPA_STATUS_SUCCESS && status != CPA_STATUS_INCOMPRESSIBLE)
464 QAT_STAT_BUMP(dc_fails);
466 if (in_pages) {
467 for (page_num = 0;
468 page_num < buf_list_src->numBuffers;
469 page_num++) {
470 kunmap(in_pages[page_num]);
472 QAT_PHYS_CONTIG_FREE(in_pages);
475 if (out_pages) {
476 for (page_num = 0; page_num < dst_pages; page_num++) {
477 kunmap(out_pages[page_num]);
479 QAT_PHYS_CONTIG_FREE(out_pages);
482 if (add_pages) {
483 for (page_num = 0;
484 page_num < buf_list_dst->numBuffers - dst_pages;
485 page_num++) {
486 kunmap(add_pages[page_num]);
488 QAT_PHYS_CONTIG_FREE(add_pages);
491 QAT_PHYS_CONTIG_FREE(buffer_meta_src);
492 QAT_PHYS_CONTIG_FREE(buffer_meta_dst);
493 QAT_PHYS_CONTIG_FREE(buf_list_src);
494 QAT_PHYS_CONTIG_FREE(buf_list_dst);
496 return (status);
500 * Entry point for QAT accelerated compression / decompression.
503 qat_compress(qat_compress_dir_t dir, char *src, int src_len,
504 char *dst, int dst_len, size_t *c_len)
506 int ret;
507 size_t add_len = 0;
508 void *add = NULL;
510 if (dir == QAT_COMPRESS) {
511 add_len = dst_len;
512 add = zio_data_buf_alloc(add_len);
515 ret = qat_compress_impl(dir, src, src_len, dst,
516 dst_len, add, add_len, c_len);
518 if (dir == QAT_COMPRESS)
519 zio_data_buf_free(add, add_len);
521 return (ret);
524 static int
525 param_set_qat_compress(const char *val, zfs_kernel_param_t *kp)
527 int ret;
528 int *pvalue = kp->arg;
529 ret = param_set_int(val, kp);
530 if (ret)
531 return (ret);
533 * zfs_qat_compress_disable = 0: enable qat compress
534 * try to initialize qat instance if it has not been done
536 if (*pvalue == 0 && !qat_dc_init_done) {
537 ret = qat_dc_init();
538 if (ret != 0) {
539 zfs_qat_compress_disable = 1;
540 return (ret);
543 return (ret);
546 module_param_call(zfs_qat_compress_disable, param_set_qat_compress,
547 param_get_int, &zfs_qat_compress_disable, 0644);
548 MODULE_PARM_DESC(zfs_qat_compress_disable, "Enable/Disable QAT compression");
550 #endif