1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
4 #include <linux/init.h>
5 #include <linux/kernel.h>
6 #include <linux/module.h>
8 #include <linux/device.h>
9 #include <linux/iommu.h>
10 #include <uapi/linux/idxd.h>
11 #include <linux/highmem.h>
12 #include <linux/sched/smt.h>
13 #include <crypto/internal/acompress.h>
16 #include "iaa_crypto.h"
17 #include "iaa_crypto_stats.h"
23 #define pr_fmt(fmt) "idxd: " IDXD_SUBDRIVER_NAME ": " fmt
25 #define IAA_ALG_PRIORITY 300
27 /* number of iaa instances probed */
28 static unsigned int nr_iaa
;
29 static unsigned int nr_cpus
;
30 static unsigned int nr_nodes
;
31 static unsigned int nr_cpus_per_node
;
33 /* Number of physical cpus sharing each iaa instance */
34 static unsigned int cpus_per_iaa
;
36 static struct crypto_comp
*deflate_generic_tfm
;
38 /* Per-cpu lookup table for balanced wqs */
39 static struct wq_table_entry __percpu
*wq_table
;
41 static struct idxd_wq
*wq_table_next_wq(int cpu
)
43 struct wq_table_entry
*entry
= per_cpu_ptr(wq_table
, cpu
);
45 if (++entry
->cur_wq
>= entry
->n_wqs
)
48 if (!entry
->wqs
[entry
->cur_wq
])
51 pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__
,
52 entry
->cur_wq
, entry
->wqs
[entry
->cur_wq
]->idxd
->id
,
53 entry
->wqs
[entry
->cur_wq
]->id
, cpu
);
55 return entry
->wqs
[entry
->cur_wq
];
58 static void wq_table_add(int cpu
, struct idxd_wq
*wq
)
60 struct wq_table_entry
*entry
= per_cpu_ptr(wq_table
, cpu
);
62 if (WARN_ON(entry
->n_wqs
== entry
->max_wqs
))
65 entry
->wqs
[entry
->n_wqs
++] = wq
;
67 pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__
,
68 entry
->wqs
[entry
->n_wqs
- 1]->idxd
->id
,
69 entry
->wqs
[entry
->n_wqs
- 1]->id
, entry
->n_wqs
- 1, cpu
);
72 static void wq_table_free_entry(int cpu
)
74 struct wq_table_entry
*entry
= per_cpu_ptr(wq_table
, cpu
);
77 memset(entry
, 0, sizeof(*entry
));
80 static void wq_table_clear_entry(int cpu
)
82 struct wq_table_entry
*entry
= per_cpu_ptr(wq_table
, cpu
);
86 memset(entry
->wqs
, 0, entry
->max_wqs
* sizeof(struct idxd_wq
*));
89 LIST_HEAD(iaa_devices
);
90 DEFINE_MUTEX(iaa_devices_lock
);
92 /* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
93 static bool iaa_crypto_enabled
;
94 static bool iaa_crypto_registered
;
96 /* Verify results of IAA compress or not */
97 static bool iaa_verify_compress
= true;
99 static ssize_t
verify_compress_show(struct device_driver
*driver
, char *buf
)
101 return sprintf(buf
, "%d\n", iaa_verify_compress
);
104 static ssize_t
verify_compress_store(struct device_driver
*driver
,
105 const char *buf
, size_t count
)
109 mutex_lock(&iaa_devices_lock
);
111 if (iaa_crypto_enabled
)
114 ret
= kstrtobool(buf
, &iaa_verify_compress
);
120 mutex_unlock(&iaa_devices_lock
);
124 static DRIVER_ATTR_RW(verify_compress
);
127 * The iaa crypto driver supports three 'sync' methods determining how
128 * compressions and decompressions are performed:
130 * - sync: the compression or decompression completes before
131 * returning. This is the mode used by the async crypto
132 * interface when the sync mode is set to 'sync' and by
133 * the sync crypto interface regardless of setting.
135 * - async: the compression or decompression is submitted and returns
136 * immediately. Completion interrupts are not used so
137 * the caller is responsible for polling the descriptor
138 * for completion. This mode is applicable to only the
139 * async crypto interface and is ignored for anything
142 * - async_irq: the compression or decompression is submitted and
143 * returns immediately. Completion interrupts are
144 * enabled so the caller can wait for the completion and
145 * yield to other threads. When the compression or
146 * decompression completes, the completion is signaled
147 * and the caller awakened. This mode is applicable to
148 * only the async crypto interface and is ignored for
151 * These modes can be set using the iaa_crypto sync_mode driver
156 static bool async_mode
;
161 * set_iaa_sync_mode - Set IAA sync mode
162 * @name: The name of the sync mode
164 * Make the IAA sync mode named @name the current sync mode used by
165 * compression/decompression.
168 static int set_iaa_sync_mode(const char *name
)
172 if (sysfs_streq(name
, "sync")) {
175 } else if (sysfs_streq(name
, "async")) {
178 } else if (sysfs_streq(name
, "async_irq")) {
188 static ssize_t
sync_mode_show(struct device_driver
*driver
, char *buf
)
192 if (!async_mode
&& !use_irq
)
193 ret
= sprintf(buf
, "%s\n", "sync");
194 else if (async_mode
&& !use_irq
)
195 ret
= sprintf(buf
, "%s\n", "async");
196 else if (async_mode
&& use_irq
)
197 ret
= sprintf(buf
, "%s\n", "async_irq");
202 static ssize_t
sync_mode_store(struct device_driver
*driver
,
203 const char *buf
, size_t count
)
207 mutex_lock(&iaa_devices_lock
);
209 if (iaa_crypto_enabled
)
212 ret
= set_iaa_sync_mode(buf
);
216 mutex_unlock(&iaa_devices_lock
);
220 static DRIVER_ATTR_RW(sync_mode
);
222 static struct iaa_compression_mode
*iaa_compression_modes
[IAA_COMP_MODES_MAX
];
224 static int find_empty_iaa_compression_mode(void)
228 for (i
= 0; i
< IAA_COMP_MODES_MAX
; i
++) {
229 if (iaa_compression_modes
[i
])
237 static struct iaa_compression_mode
*find_iaa_compression_mode(const char *name
, int *idx
)
239 struct iaa_compression_mode
*mode
;
242 for (i
= 0; i
< IAA_COMP_MODES_MAX
; i
++) {
243 mode
= iaa_compression_modes
[i
];
247 if (!strcmp(mode
->name
, name
)) {
249 return iaa_compression_modes
[i
];
256 static void free_iaa_compression_mode(struct iaa_compression_mode
*mode
)
259 kfree(mode
->ll_table
);
260 kfree(mode
->d_table
);
266 * IAA Compression modes are defined by an ll_table and a d_table.
267 * These tables are typically generated and captured using statistics
268 * collected from running actual compress/decompress workloads.
270 * A module or other kernel code can add and remove compression modes
271 * with a given name using the exported @add_iaa_compression_mode()
272 * and @remove_iaa_compression_mode functions.
274 * When a new compression mode is added, the tables are saved in a
275 * global compression mode list. When IAA devices are added, a
276 * per-IAA device dma mapping is created for each IAA device, for each
277 * compression mode. These are the tables used to do the actual
278 * compression/deccompression and are unmapped if/when the devices are
279 * removed. Currently, compression modes must be added before any
280 * device is added, and removed after all devices have been removed.
284 * remove_iaa_compression_mode - Remove an IAA compression mode
285 * @name: The name the compression mode will be known as
287 * Remove the IAA compression mode named @name.
289 void remove_iaa_compression_mode(const char *name
)
291 struct iaa_compression_mode
*mode
;
294 mutex_lock(&iaa_devices_lock
);
296 if (!list_empty(&iaa_devices
))
299 mode
= find_iaa_compression_mode(name
, &idx
);
301 free_iaa_compression_mode(mode
);
302 iaa_compression_modes
[idx
] = NULL
;
305 mutex_unlock(&iaa_devices_lock
);
307 EXPORT_SYMBOL_GPL(remove_iaa_compression_mode
);
310 * add_iaa_compression_mode - Add an IAA compression mode
311 * @name: The name the compression mode will be known as
312 * @ll_table: The ll table
313 * @ll_table_size: The ll table size in bytes
314 * @d_table: The d table
315 * @d_table_size: The d table size in bytes
316 * @init: Optional callback function to init the compression mode data
317 * @free: Optional callback function to free the compression mode data
319 * Add a new IAA compression mode named @name.
321 * Returns 0 if successful, errcode otherwise.
323 int add_iaa_compression_mode(const char *name
,
328 iaa_dev_comp_init_fn_t init
,
329 iaa_dev_comp_free_fn_t free
)
331 struct iaa_compression_mode
*mode
;
332 int idx
, ret
= -ENOMEM
;
334 mutex_lock(&iaa_devices_lock
);
336 if (!list_empty(&iaa_devices
)) {
341 mode
= kzalloc(sizeof(*mode
), GFP_KERNEL
);
345 mode
->name
= kstrdup(name
, GFP_KERNEL
);
350 mode
->ll_table
= kmemdup(ll_table
, ll_table_size
, GFP_KERNEL
);
353 mode
->ll_table_size
= ll_table_size
;
357 mode
->d_table
= kmemdup(d_table
, d_table_size
, GFP_KERNEL
);
360 mode
->d_table_size
= d_table_size
;
366 idx
= find_empty_iaa_compression_mode();
370 pr_debug("IAA compression mode %s added at idx %d\n",
373 iaa_compression_modes
[idx
] = mode
;
377 mutex_unlock(&iaa_devices_lock
);
381 free_iaa_compression_mode(mode
);
384 EXPORT_SYMBOL_GPL(add_iaa_compression_mode
);
386 static struct iaa_device_compression_mode
*
387 get_iaa_device_compression_mode(struct iaa_device
*iaa_device
, int idx
)
389 return iaa_device
->compression_modes
[idx
];
392 static void free_device_compression_mode(struct iaa_device
*iaa_device
,
393 struct iaa_device_compression_mode
*device_mode
)
395 size_t size
= sizeof(struct aecs_comp_table_record
) + IAA_AECS_ALIGN
;
396 struct device
*dev
= &iaa_device
->idxd
->pdev
->dev
;
398 kfree(device_mode
->name
);
400 if (device_mode
->aecs_comp_table
)
401 dma_free_coherent(dev
, size
, device_mode
->aecs_comp_table
,
402 device_mode
->aecs_comp_table_dma_addr
);
406 #define IDXD_OP_FLAG_AECS_RW_TGLS 0x400000
407 #define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC)
408 #define IAX_AECS_COMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
409 #define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
410 #define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \
411 IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
412 IDXD_OP_FLAG_AECS_RW_TGLS)
414 static int check_completion(struct device
*dev
,
415 struct iax_completion_record
*comp
,
419 static int init_device_compression_mode(struct iaa_device
*iaa_device
,
420 struct iaa_compression_mode
*mode
,
421 int idx
, struct idxd_wq
*wq
)
423 size_t size
= sizeof(struct aecs_comp_table_record
) + IAA_AECS_ALIGN
;
424 struct device
*dev
= &iaa_device
->idxd
->pdev
->dev
;
425 struct iaa_device_compression_mode
*device_mode
;
428 device_mode
= kzalloc(sizeof(*device_mode
), GFP_KERNEL
);
432 device_mode
->name
= kstrdup(mode
->name
, GFP_KERNEL
);
433 if (!device_mode
->name
)
436 device_mode
->aecs_comp_table
= dma_alloc_coherent(dev
, size
,
437 &device_mode
->aecs_comp_table_dma_addr
, GFP_KERNEL
);
438 if (!device_mode
->aecs_comp_table
)
441 /* Add Huffman table to aecs */
442 memset(device_mode
->aecs_comp_table
, 0, sizeof(*device_mode
->aecs_comp_table
));
443 memcpy(device_mode
->aecs_comp_table
->ll_sym
, mode
->ll_table
, mode
->ll_table_size
);
444 memcpy(device_mode
->aecs_comp_table
->d_sym
, mode
->d_table
, mode
->d_table_size
);
447 ret
= mode
->init(device_mode
);
452 /* mode index should match iaa_compression_modes idx */
453 iaa_device
->compression_modes
[idx
] = device_mode
;
455 pr_debug("IAA %s compression mode initialized for iaa device %d\n",
456 mode
->name
, iaa_device
->idxd
->id
);
462 pr_debug("IAA %s compression mode initialization failed for iaa device %d\n",
463 mode
->name
, iaa_device
->idxd
->id
);
465 free_device_compression_mode(iaa_device
, device_mode
);
469 static int init_device_compression_modes(struct iaa_device
*iaa_device
,
472 struct iaa_compression_mode
*mode
;
475 for (i
= 0; i
< IAA_COMP_MODES_MAX
; i
++) {
476 mode
= iaa_compression_modes
[i
];
480 ret
= init_device_compression_mode(iaa_device
, mode
, i
, wq
);
488 static void remove_device_compression_modes(struct iaa_device
*iaa_device
)
490 struct iaa_device_compression_mode
*device_mode
;
493 for (i
= 0; i
< IAA_COMP_MODES_MAX
; i
++) {
494 device_mode
= iaa_device
->compression_modes
[i
];
498 if (iaa_compression_modes
[i
]->free
)
499 iaa_compression_modes
[i
]->free(device_mode
);
500 free_device_compression_mode(iaa_device
, device_mode
);
501 iaa_device
->compression_modes
[i
] = NULL
;
505 static struct iaa_device
*iaa_device_alloc(void)
507 struct iaa_device
*iaa_device
;
509 iaa_device
= kzalloc(sizeof(*iaa_device
), GFP_KERNEL
);
513 INIT_LIST_HEAD(&iaa_device
->wqs
);
518 static bool iaa_has_wq(struct iaa_device
*iaa_device
, struct idxd_wq
*wq
)
520 struct iaa_wq
*iaa_wq
;
522 list_for_each_entry(iaa_wq
, &iaa_device
->wqs
, list
) {
523 if (iaa_wq
->wq
== wq
)
530 static struct iaa_device
*add_iaa_device(struct idxd_device
*idxd
)
532 struct iaa_device
*iaa_device
;
534 iaa_device
= iaa_device_alloc();
538 iaa_device
->idxd
= idxd
;
540 list_add_tail(&iaa_device
->list
, &iaa_devices
);
547 static int init_iaa_device(struct iaa_device
*iaa_device
, struct iaa_wq
*iaa_wq
)
551 ret
= init_device_compression_modes(iaa_device
, iaa_wq
->wq
);
558 static void del_iaa_device(struct iaa_device
*iaa_device
)
560 list_del(&iaa_device
->list
);
565 static int add_iaa_wq(struct iaa_device
*iaa_device
, struct idxd_wq
*wq
,
566 struct iaa_wq
**new_wq
)
568 struct idxd_device
*idxd
= iaa_device
->idxd
;
569 struct pci_dev
*pdev
= idxd
->pdev
;
570 struct device
*dev
= &pdev
->dev
;
571 struct iaa_wq
*iaa_wq
;
573 iaa_wq
= kzalloc(sizeof(*iaa_wq
), GFP_KERNEL
);
578 iaa_wq
->iaa_device
= iaa_device
;
579 idxd_wq_set_private(wq
, iaa_wq
);
581 list_add_tail(&iaa_wq
->list
, &iaa_device
->wqs
);
588 dev_dbg(dev
, "added wq %d to iaa device %d, n_wq %d\n",
589 wq
->id
, iaa_device
->idxd
->id
, iaa_device
->n_wq
);
594 static void del_iaa_wq(struct iaa_device
*iaa_device
, struct idxd_wq
*wq
)
596 struct idxd_device
*idxd
= iaa_device
->idxd
;
597 struct pci_dev
*pdev
= idxd
->pdev
;
598 struct device
*dev
= &pdev
->dev
;
599 struct iaa_wq
*iaa_wq
;
601 list_for_each_entry(iaa_wq
, &iaa_device
->wqs
, list
) {
602 if (iaa_wq
->wq
== wq
) {
603 list_del(&iaa_wq
->list
);
606 dev_dbg(dev
, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n",
607 wq
->id
, iaa_device
->idxd
->id
,
608 iaa_device
->n_wq
, nr_iaa
);
610 if (iaa_device
->n_wq
== 0)
611 del_iaa_device(iaa_device
);
617 static void clear_wq_table(void)
621 for (cpu
= 0; cpu
< nr_cpus
; cpu
++)
622 wq_table_clear_entry(cpu
);
624 pr_debug("cleared wq table\n");
627 static void free_iaa_device(struct iaa_device
*iaa_device
)
632 remove_device_compression_modes(iaa_device
);
636 static void __free_iaa_wq(struct iaa_wq
*iaa_wq
)
638 struct iaa_device
*iaa_device
;
643 iaa_device
= iaa_wq
->iaa_device
;
644 if (iaa_device
->n_wq
== 0)
645 free_iaa_device(iaa_wq
->iaa_device
);
648 static void free_iaa_wq(struct iaa_wq
*iaa_wq
)
652 __free_iaa_wq(iaa_wq
);
657 idxd_wq_set_private(wq
, NULL
);
660 static int iaa_wq_get(struct idxd_wq
*wq
)
662 struct idxd_device
*idxd
= wq
->idxd
;
663 struct iaa_wq
*iaa_wq
;
666 spin_lock(&idxd
->dev_lock
);
667 iaa_wq
= idxd_wq_get_private(wq
);
668 if (iaa_wq
&& !iaa_wq
->remove
) {
674 spin_unlock(&idxd
->dev_lock
);
679 static int iaa_wq_put(struct idxd_wq
*wq
)
681 struct idxd_device
*idxd
= wq
->idxd
;
682 struct iaa_wq
*iaa_wq
;
686 spin_lock(&idxd
->dev_lock
);
687 iaa_wq
= idxd_wq_get_private(wq
);
690 if (iaa_wq
->ref
== 0 && iaa_wq
->remove
) {
691 idxd_wq_set_private(wq
, NULL
);
698 spin_unlock(&idxd
->dev_lock
);
700 __free_iaa_wq(iaa_wq
);
707 static void free_wq_table(void)
711 for (cpu
= 0; cpu
< nr_cpus
; cpu
++)
712 wq_table_free_entry(cpu
);
714 free_percpu(wq_table
);
716 pr_debug("freed wq table\n");
719 static int alloc_wq_table(int max_wqs
)
721 struct wq_table_entry
*entry
;
724 wq_table
= alloc_percpu(struct wq_table_entry
);
728 for (cpu
= 0; cpu
< nr_cpus
; cpu
++) {
729 entry
= per_cpu_ptr(wq_table
, cpu
);
730 entry
->wqs
= kcalloc(max_wqs
, sizeof(struct wq
*), GFP_KERNEL
);
736 entry
->max_wqs
= max_wqs
;
739 pr_debug("initialized wq table\n");
744 static int save_iaa_wq(struct idxd_wq
*wq
)
746 struct iaa_device
*iaa_device
, *found
= NULL
;
747 struct idxd_device
*idxd
;
748 struct pci_dev
*pdev
;
752 list_for_each_entry(iaa_device
, &iaa_devices
, list
) {
753 if (iaa_device
->idxd
== wq
->idxd
) {
754 idxd
= iaa_device
->idxd
;
758 * Check to see that we don't already have this wq.
759 * Shouldn't happen but we don't control probing.
761 if (iaa_has_wq(iaa_device
, wq
)) {
762 dev_dbg(dev
, "same wq probed multiple times for iaa_device %p\n",
769 ret
= add_iaa_wq(iaa_device
, wq
, NULL
);
778 struct iaa_device
*new_device
;
779 struct iaa_wq
*new_wq
;
781 new_device
= add_iaa_device(wq
->idxd
);
787 ret
= add_iaa_wq(new_device
, wq
, &new_wq
);
789 del_iaa_device(new_device
);
790 free_iaa_device(new_device
);
794 ret
= init_iaa_device(new_device
, new_wq
);
796 del_iaa_wq(new_device
, new_wq
->wq
);
797 del_iaa_device(new_device
);
803 if (WARN_ON(nr_iaa
== 0))
806 cpus_per_iaa
= (nr_nodes
* nr_cpus_per_node
) / nr_iaa
;
813 static void remove_iaa_wq(struct idxd_wq
*wq
)
815 struct iaa_device
*iaa_device
;
817 list_for_each_entry(iaa_device
, &iaa_devices
, list
) {
818 if (iaa_has_wq(iaa_device
, wq
)) {
819 del_iaa_wq(iaa_device
, wq
);
825 cpus_per_iaa
= (nr_nodes
* nr_cpus_per_node
) / nr_iaa
;
832 static int wq_table_add_wqs(int iaa
, int cpu
)
834 struct iaa_device
*iaa_device
, *found_device
= NULL
;
835 int ret
= 0, cur_iaa
= 0, n_wqs_added
= 0;
836 struct idxd_device
*idxd
;
837 struct iaa_wq
*iaa_wq
;
838 struct pci_dev
*pdev
;
841 list_for_each_entry(iaa_device
, &iaa_devices
, list
) {
842 idxd
= iaa_device
->idxd
;
846 if (cur_iaa
!= iaa
) {
851 found_device
= iaa_device
;
852 dev_dbg(dev
, "getting wq from iaa_device %d, cur_iaa %d\n",
853 found_device
->idxd
->id
, cur_iaa
);
858 found_device
= list_first_entry_or_null(&iaa_devices
,
859 struct iaa_device
, list
);
861 pr_debug("couldn't find any iaa devices with wqs!\n");
867 idxd
= found_device
->idxd
;
870 dev_dbg(dev
, "getting wq from only iaa_device %d, cur_iaa %d\n",
871 found_device
->idxd
->id
, cur_iaa
);
874 list_for_each_entry(iaa_wq
, &found_device
->wqs
, list
) {
875 wq_table_add(cpu
, iaa_wq
->wq
);
876 pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n",
877 cpu
, iaa_wq
->wq
->idxd
->id
, iaa_wq
->wq
->id
);
882 pr_debug("couldn't find any iaa wqs!\n");
891 * Rebalance the wq table so that given a cpu, it's easy to find the
892 * closest IAA instance. The idea is to try to choose the most
893 * appropriate IAA instance for a caller and spread available
894 * workqueues around to clients.
896 static void rebalance_wq_table(void)
898 const struct cpumask
*node_cpus
;
899 int node
, cpu
, iaa
= -1;
904 pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n",
905 nr_nodes
, nr_cpus
, nr_iaa
, cpus_per_iaa
);
910 for (cpu
= 0; cpu
< nr_cpus
; cpu
++) {
911 if (WARN_ON(wq_table_add_wqs(0, cpu
))) {
912 pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu
);
920 for_each_node_with_cpus(node
) {
921 node_cpus
= cpumask_of_node(node
);
923 for (cpu
= 0; cpu
< cpumask_weight(node_cpus
); cpu
++) {
924 int node_cpu
= cpumask_nth(cpu
, node_cpus
);
926 if (WARN_ON(node_cpu
>= nr_cpu_ids
)) {
927 pr_debug("node_cpu %d doesn't exist!\n", node_cpu
);
931 if ((cpu
% cpus_per_iaa
) == 0)
934 if (WARN_ON(wq_table_add_wqs(iaa
, node_cpu
))) {
935 pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa
, cpu
);
942 static inline int check_completion(struct device
*dev
,
943 struct iax_completion_record
*comp
,
947 char *op_str
= compress
? "compress" : "decompress";
950 while (!comp
->status
) {
956 if (comp
->status
!= IAX_COMP_SUCCESS
) {
957 if (comp
->status
== IAA_ERROR_WATCHDOG_EXPIRED
) {
959 dev_dbg(dev
, "%s timed out, size=0x%x\n",
960 op_str
, comp
->output_size
);
961 update_completion_timeout_errs();
965 if (comp
->status
== IAA_ANALYTICS_ERROR
&&
966 comp
->error_code
== IAA_ERROR_COMP_BUF_OVERFLOW
&& compress
) {
968 dev_dbg(dev
, "compressed > uncompressed size,"
969 " not compressing, size=0x%x\n",
971 update_completion_comp_buf_overflow_errs();
975 if (comp
->status
== IAA_ERROR_DECOMP_BUF_OVERFLOW
) {
981 dev_dbg(dev
, "iaa %s status=0x%x, error=0x%x, size=0x%x\n",
982 op_str
, comp
->status
, comp
->error_code
, comp
->output_size
);
983 print_hex_dump(KERN_INFO
, "cmp-rec: ", DUMP_PREFIX_OFFSET
, 8, 1, comp
, 64, 0);
984 update_completion_einval_errs();
992 static int deflate_generic_decompress(struct acomp_req
*req
)
997 src
= kmap_local_page(sg_page(req
->src
)) + req
->src
->offset
;
998 dst
= kmap_local_page(sg_page(req
->dst
)) + req
->dst
->offset
;
1000 ret
= crypto_comp_decompress(deflate_generic_tfm
,
1001 src
, req
->slen
, dst
, &req
->dlen
);
1006 update_total_sw_decomp_calls();
1011 static int iaa_remap_for_verify(struct device
*dev
, struct iaa_wq
*iaa_wq
,
1012 struct acomp_req
*req
,
1013 dma_addr_t
*src_addr
, dma_addr_t
*dst_addr
);
1015 static int iaa_compress_verify(struct crypto_tfm
*tfm
, struct acomp_req
*req
,
1017 dma_addr_t src_addr
, unsigned int slen
,
1018 dma_addr_t dst_addr
, unsigned int *dlen
,
1019 u32 compression_crc
);
1021 static void iaa_desc_complete(struct idxd_desc
*idxd_desc
,
1022 enum idxd_complete_type comp_type
,
1023 bool free_desc
, void *__ctx
,
1026 struct iaa_device_compression_mode
*active_compression_mode
;
1027 struct iaa_compression_ctx
*compression_ctx
;
1028 struct crypto_ctx
*ctx
= __ctx
;
1029 struct iaa_device
*iaa_device
;
1030 struct idxd_device
*idxd
;
1031 struct iaa_wq
*iaa_wq
;
1032 struct pci_dev
*pdev
;
1036 compression_ctx
= crypto_tfm_ctx(ctx
->tfm
);
1038 iaa_wq
= idxd_wq_get_private(idxd_desc
->wq
);
1039 iaa_device
= iaa_wq
->iaa_device
;
1040 idxd
= iaa_device
->idxd
;
1044 active_compression_mode
= get_iaa_device_compression_mode(iaa_device
,
1045 compression_ctx
->mode
);
1046 dev_dbg(dev
, "%s: compression mode %s,"
1047 " ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__
,
1048 active_compression_mode
->name
,
1049 ctx
->src_addr
, ctx
->dst_addr
);
1051 ret
= check_completion(dev
, idxd_desc
->iax_completion
,
1052 ctx
->compress
, false);
1054 dev_dbg(dev
, "%s: check_completion failed ret=%d\n", __func__
, ret
);
1055 if (!ctx
->compress
&&
1056 idxd_desc
->iax_completion
->status
== IAA_ANALYTICS_ERROR
) {
1057 pr_warn("%s: falling back to deflate-generic decompress, "
1058 "analytics error code %x\n", __func__
,
1059 idxd_desc
->iax_completion
->error_code
);
1060 ret
= deflate_generic_decompress(ctx
->req
);
1062 dev_dbg(dev
, "%s: deflate-generic failed ret=%d\n",
1072 ctx
->req
->dlen
= idxd_desc
->iax_completion
->output_size
;
1076 if (ctx
->compress
) {
1077 update_total_comp_bytes_out(ctx
->req
->dlen
);
1078 update_wq_comp_bytes(iaa_wq
->wq
, ctx
->req
->dlen
);
1080 update_total_decomp_bytes_in(ctx
->req
->slen
);
1081 update_wq_decomp_bytes(iaa_wq
->wq
, ctx
->req
->slen
);
1084 if (ctx
->compress
&& compression_ctx
->verify_compress
) {
1085 dma_addr_t src_addr
, dst_addr
;
1086 u32 compression_crc
;
1088 compression_crc
= idxd_desc
->iax_completion
->crc
;
1090 ret
= iaa_remap_for_verify(dev
, iaa_wq
, ctx
->req
, &src_addr
, &dst_addr
);
1092 dev_dbg(dev
, "%s: compress verify remap failed ret=%d\n", __func__
, ret
);
1097 ret
= iaa_compress_verify(ctx
->tfm
, ctx
->req
, iaa_wq
->wq
, src_addr
,
1098 ctx
->req
->slen
, dst_addr
, &ctx
->req
->dlen
,
1101 dev_dbg(dev
, "%s: compress verify failed ret=%d\n", __func__
, ret
);
1105 dma_unmap_sg(dev
, ctx
->req
->dst
, sg_nents(ctx
->req
->dst
), DMA_TO_DEVICE
);
1106 dma_unmap_sg(dev
, ctx
->req
->src
, sg_nents(ctx
->req
->src
), DMA_FROM_DEVICE
);
1111 dma_unmap_sg(dev
, ctx
->req
->dst
, sg_nents(ctx
->req
->dst
), DMA_FROM_DEVICE
);
1112 dma_unmap_sg(dev
, ctx
->req
->src
, sg_nents(ctx
->req
->src
), DMA_TO_DEVICE
);
1115 dev_dbg(dev
, "asynchronous compress failed ret=%d\n", ret
);
1117 if (ctx
->req
->base
.complete
)
1118 acomp_request_complete(ctx
->req
, err
);
1121 idxd_free_desc(idxd_desc
->wq
, idxd_desc
);
1122 iaa_wq_put(idxd_desc
->wq
);
1125 static int iaa_compress(struct crypto_tfm
*tfm
, struct acomp_req
*req
,
1127 dma_addr_t src_addr
, unsigned int slen
,
1128 dma_addr_t dst_addr
, unsigned int *dlen
,
1129 u32
*compression_crc
,
1132 struct iaa_device_compression_mode
*active_compression_mode
;
1133 struct iaa_compression_ctx
*ctx
= crypto_tfm_ctx(tfm
);
1134 struct iaa_device
*iaa_device
;
1135 struct idxd_desc
*idxd_desc
;
1136 struct iax_hw_desc
*desc
;
1137 struct idxd_device
*idxd
;
1138 struct iaa_wq
*iaa_wq
;
1139 struct pci_dev
*pdev
;
1143 iaa_wq
= idxd_wq_get_private(wq
);
1144 iaa_device
= iaa_wq
->iaa_device
;
1145 idxd
= iaa_device
->idxd
;
1149 active_compression_mode
= get_iaa_device_compression_mode(iaa_device
, ctx
->mode
);
1151 idxd_desc
= idxd_alloc_desc(wq
, IDXD_OP_BLOCK
);
1152 if (IS_ERR(idxd_desc
)) {
1153 dev_dbg(dev
, "idxd descriptor allocation failed\n");
1154 dev_dbg(dev
, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc
));
1155 return PTR_ERR(idxd_desc
);
1157 desc
= idxd_desc
->iax_hw
;
1159 desc
->flags
= IDXD_OP_FLAG_CRAV
| IDXD_OP_FLAG_RCR
|
1160 IDXD_OP_FLAG_RD_SRC2_AECS
| IDXD_OP_FLAG_CC
;
1161 desc
->opcode
= IAX_OPCODE_COMPRESS
;
1162 desc
->compr_flags
= IAA_COMP_FLAGS
;
1165 desc
->src1_addr
= (u64
)src_addr
;
1166 desc
->src1_size
= slen
;
1167 desc
->dst_addr
= (u64
)dst_addr
;
1168 desc
->max_dst_size
= *dlen
;
1169 desc
->src2_addr
= active_compression_mode
->aecs_comp_table_dma_addr
;
1170 desc
->src2_size
= sizeof(struct aecs_comp_table_record
);
1171 desc
->completion_addr
= idxd_desc
->compl_dma
;
1173 if (ctx
->use_irq
&& !disable_async
) {
1174 desc
->flags
|= IDXD_OP_FLAG_RCI
;
1176 idxd_desc
->crypto
.req
= req
;
1177 idxd_desc
->crypto
.tfm
= tfm
;
1178 idxd_desc
->crypto
.src_addr
= src_addr
;
1179 idxd_desc
->crypto
.dst_addr
= dst_addr
;
1180 idxd_desc
->crypto
.compress
= true;
1182 dev_dbg(dev
, "%s use_async_irq: compression mode %s,"
1183 " src_addr %llx, dst_addr %llx\n", __func__
,
1184 active_compression_mode
->name
,
1185 src_addr
, dst_addr
);
1186 } else if (ctx
->async_mode
&& !disable_async
)
1187 req
->base
.data
= idxd_desc
;
1189 dev_dbg(dev
, "%s: compression mode %s,"
1190 " desc->src1_addr %llx, desc->src1_size %d,"
1191 " desc->dst_addr %llx, desc->max_dst_size %d,"
1192 " desc->src2_addr %llx, desc->src2_size %d\n", __func__
,
1193 active_compression_mode
->name
,
1194 desc
->src1_addr
, desc
->src1_size
, desc
->dst_addr
,
1195 desc
->max_dst_size
, desc
->src2_addr
, desc
->src2_size
);
1197 ret
= idxd_submit_desc(wq
, idxd_desc
);
1199 dev_dbg(dev
, "submit_desc failed ret=%d\n", ret
);
1204 update_total_comp_calls();
1205 update_wq_comp_calls(wq
);
1207 if (ctx
->async_mode
&& !disable_async
) {
1209 dev_dbg(dev
, "%s: returning -EINPROGRESS\n", __func__
);
1213 ret
= check_completion(dev
, idxd_desc
->iax_completion
, true, false);
1215 dev_dbg(dev
, "check_completion failed ret=%d\n", ret
);
1219 *dlen
= idxd_desc
->iax_completion
->output_size
;
1222 update_total_comp_bytes_out(*dlen
);
1223 update_wq_comp_bytes(wq
, *dlen
);
1225 *compression_crc
= idxd_desc
->iax_completion
->crc
;
1227 if (!ctx
->async_mode
|| disable_async
)
1228 idxd_free_desc(wq
, idxd_desc
);
1232 idxd_free_desc(wq
, idxd_desc
);
1233 dev_dbg(dev
, "iaa compress failed: ret=%d\n", ret
);
1238 static int iaa_remap_for_verify(struct device
*dev
, struct iaa_wq
*iaa_wq
,
1239 struct acomp_req
*req
,
1240 dma_addr_t
*src_addr
, dma_addr_t
*dst_addr
)
1245 dma_unmap_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1246 dma_unmap_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1248 nr_sgs
= dma_map_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_FROM_DEVICE
);
1249 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1250 dev_dbg(dev
, "verify: couldn't map src sg for iaa device %d,"
1251 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1252 iaa_wq
->wq
->id
, ret
);
1256 *src_addr
= sg_dma_address(req
->src
);
1257 dev_dbg(dev
, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1258 " req->slen %d, sg_dma_len(sg) %d\n", *src_addr
, nr_sgs
,
1259 req
->src
, req
->slen
, sg_dma_len(req
->src
));
1261 nr_sgs
= dma_map_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_TO_DEVICE
);
1262 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1263 dev_dbg(dev
, "verify: couldn't map dst sg for iaa device %d,"
1264 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1265 iaa_wq
->wq
->id
, ret
);
1267 dma_unmap_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_FROM_DEVICE
);
1270 *dst_addr
= sg_dma_address(req
->dst
);
1271 dev_dbg(dev
, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1272 " req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr
, nr_sgs
,
1273 req
->dst
, req
->dlen
, sg_dma_len(req
->dst
));
1278 static int iaa_compress_verify(struct crypto_tfm
*tfm
, struct acomp_req
*req
,
1280 dma_addr_t src_addr
, unsigned int slen
,
1281 dma_addr_t dst_addr
, unsigned int *dlen
,
1282 u32 compression_crc
)
1284 struct iaa_device_compression_mode
*active_compression_mode
;
1285 struct iaa_compression_ctx
*ctx
= crypto_tfm_ctx(tfm
);
1286 struct iaa_device
*iaa_device
;
1287 struct idxd_desc
*idxd_desc
;
1288 struct iax_hw_desc
*desc
;
1289 struct idxd_device
*idxd
;
1290 struct iaa_wq
*iaa_wq
;
1291 struct pci_dev
*pdev
;
1295 iaa_wq
= idxd_wq_get_private(wq
);
1296 iaa_device
= iaa_wq
->iaa_device
;
1297 idxd
= iaa_device
->idxd
;
1301 active_compression_mode
= get_iaa_device_compression_mode(iaa_device
, ctx
->mode
);
1303 idxd_desc
= idxd_alloc_desc(wq
, IDXD_OP_BLOCK
);
1304 if (IS_ERR(idxd_desc
)) {
1305 dev_dbg(dev
, "idxd descriptor allocation failed\n");
1306 dev_dbg(dev
, "iaa compress failed: ret=%ld\n",
1307 PTR_ERR(idxd_desc
));
1308 return PTR_ERR(idxd_desc
);
1310 desc
= idxd_desc
->iax_hw
;
1312 /* Verify (optional) - decompress and check crc, suppress dest write */
1314 desc
->flags
= IDXD_OP_FLAG_CRAV
| IDXD_OP_FLAG_RCR
| IDXD_OP_FLAG_CC
;
1315 desc
->opcode
= IAX_OPCODE_DECOMPRESS
;
1316 desc
->decompr_flags
= IAA_DECOMP_FLAGS
| IAA_DECOMP_SUPPRESS_OUTPUT
;
1319 desc
->src1_addr
= (u64
)dst_addr
;
1320 desc
->src1_size
= *dlen
;
1321 desc
->dst_addr
= (u64
)src_addr
;
1322 desc
->max_dst_size
= slen
;
1323 desc
->completion_addr
= idxd_desc
->compl_dma
;
1325 dev_dbg(dev
, "(verify) compression mode %s,"
1326 " desc->src1_addr %llx, desc->src1_size %d,"
1327 " desc->dst_addr %llx, desc->max_dst_size %d,"
1328 " desc->src2_addr %llx, desc->src2_size %d\n",
1329 active_compression_mode
->name
,
1330 desc
->src1_addr
, desc
->src1_size
, desc
->dst_addr
,
1331 desc
->max_dst_size
, desc
->src2_addr
, desc
->src2_size
);
1333 ret
= idxd_submit_desc(wq
, idxd_desc
);
1335 dev_dbg(dev
, "submit_desc (verify) failed ret=%d\n", ret
);
1339 ret
= check_completion(dev
, idxd_desc
->iax_completion
, false, false);
1341 dev_dbg(dev
, "(verify) check_completion failed ret=%d\n", ret
);
1345 if (compression_crc
!= idxd_desc
->iax_completion
->crc
) {
1347 dev_dbg(dev
, "(verify) iaa comp/decomp crc mismatch:"
1348 " comp=0x%x, decomp=0x%x\n", compression_crc
,
1349 idxd_desc
->iax_completion
->crc
);
1350 print_hex_dump(KERN_INFO
, "cmp-rec: ", DUMP_PREFIX_OFFSET
,
1351 8, 1, idxd_desc
->iax_completion
, 64, 0);
1355 idxd_free_desc(wq
, idxd_desc
);
1359 idxd_free_desc(wq
, idxd_desc
);
1360 dev_dbg(dev
, "iaa compress failed: ret=%d\n", ret
);
1365 static int iaa_decompress(struct crypto_tfm
*tfm
, struct acomp_req
*req
,
1367 dma_addr_t src_addr
, unsigned int slen
,
1368 dma_addr_t dst_addr
, unsigned int *dlen
,
1371 struct iaa_device_compression_mode
*active_compression_mode
;
1372 struct iaa_compression_ctx
*ctx
= crypto_tfm_ctx(tfm
);
1373 struct iaa_device
*iaa_device
;
1374 struct idxd_desc
*idxd_desc
;
1375 struct iax_hw_desc
*desc
;
1376 struct idxd_device
*idxd
;
1377 struct iaa_wq
*iaa_wq
;
1378 struct pci_dev
*pdev
;
1382 iaa_wq
= idxd_wq_get_private(wq
);
1383 iaa_device
= iaa_wq
->iaa_device
;
1384 idxd
= iaa_device
->idxd
;
1388 active_compression_mode
= get_iaa_device_compression_mode(iaa_device
, ctx
->mode
);
1390 idxd_desc
= idxd_alloc_desc(wq
, IDXD_OP_BLOCK
);
1391 if (IS_ERR(idxd_desc
)) {
1392 dev_dbg(dev
, "idxd descriptor allocation failed\n");
1393 dev_dbg(dev
, "iaa decompress failed: ret=%ld\n",
1394 PTR_ERR(idxd_desc
));
1395 return PTR_ERR(idxd_desc
);
1397 desc
= idxd_desc
->iax_hw
;
1399 desc
->flags
= IDXD_OP_FLAG_CRAV
| IDXD_OP_FLAG_RCR
| IDXD_OP_FLAG_CC
;
1400 desc
->opcode
= IAX_OPCODE_DECOMPRESS
;
1401 desc
->max_dst_size
= PAGE_SIZE
;
1402 desc
->decompr_flags
= IAA_DECOMP_FLAGS
;
1405 desc
->src1_addr
= (u64
)src_addr
;
1406 desc
->dst_addr
= (u64
)dst_addr
;
1407 desc
->max_dst_size
= *dlen
;
1408 desc
->src1_size
= slen
;
1409 desc
->completion_addr
= idxd_desc
->compl_dma
;
1411 if (ctx
->use_irq
&& !disable_async
) {
1412 desc
->flags
|= IDXD_OP_FLAG_RCI
;
1414 idxd_desc
->crypto
.req
= req
;
1415 idxd_desc
->crypto
.tfm
= tfm
;
1416 idxd_desc
->crypto
.src_addr
= src_addr
;
1417 idxd_desc
->crypto
.dst_addr
= dst_addr
;
1418 idxd_desc
->crypto
.compress
= false;
1420 dev_dbg(dev
, "%s: use_async_irq compression mode %s,"
1421 " src_addr %llx, dst_addr %llx\n", __func__
,
1422 active_compression_mode
->name
,
1423 src_addr
, dst_addr
);
1424 } else if (ctx
->async_mode
&& !disable_async
)
1425 req
->base
.data
= idxd_desc
;
1427 dev_dbg(dev
, "%s: decompression mode %s,"
1428 " desc->src1_addr %llx, desc->src1_size %d,"
1429 " desc->dst_addr %llx, desc->max_dst_size %d,"
1430 " desc->src2_addr %llx, desc->src2_size %d\n", __func__
,
1431 active_compression_mode
->name
,
1432 desc
->src1_addr
, desc
->src1_size
, desc
->dst_addr
,
1433 desc
->max_dst_size
, desc
->src2_addr
, desc
->src2_size
);
1435 ret
= idxd_submit_desc(wq
, idxd_desc
);
1437 dev_dbg(dev
, "submit_desc failed ret=%d\n", ret
);
1442 update_total_decomp_calls();
1443 update_wq_decomp_calls(wq
);
1445 if (ctx
->async_mode
&& !disable_async
) {
1447 dev_dbg(dev
, "%s: returning -EINPROGRESS\n", __func__
);
1451 ret
= check_completion(dev
, idxd_desc
->iax_completion
, false, false);
1453 dev_dbg(dev
, "%s: check_completion failed ret=%d\n", __func__
, ret
);
1454 if (idxd_desc
->iax_completion
->status
== IAA_ANALYTICS_ERROR
) {
1455 pr_warn("%s: falling back to deflate-generic decompress, "
1456 "analytics error code %x\n", __func__
,
1457 idxd_desc
->iax_completion
->error_code
);
1458 ret
= deflate_generic_decompress(req
);
1460 dev_dbg(dev
, "%s: deflate-generic failed ret=%d\n",
1468 req
->dlen
= idxd_desc
->iax_completion
->output_size
;
1473 if (!ctx
->async_mode
|| disable_async
)
1474 idxd_free_desc(wq
, idxd_desc
);
1477 update_total_decomp_bytes_in(slen
);
1478 update_wq_decomp_bytes(wq
, slen
);
1482 idxd_free_desc(wq
, idxd_desc
);
1483 dev_dbg(dev
, "iaa decompress failed: ret=%d\n", ret
);
1488 static int iaa_comp_acompress(struct acomp_req
*req
)
1490 struct iaa_compression_ctx
*compression_ctx
;
1491 struct crypto_tfm
*tfm
= req
->base
.tfm
;
1492 dma_addr_t src_addr
, dst_addr
;
1493 bool disable_async
= false;
1494 int nr_sgs
, cpu
, ret
= 0;
1495 struct iaa_wq
*iaa_wq
;
1496 u32 compression_crc
;
1501 compression_ctx
= crypto_tfm_ctx(tfm
);
1503 if (!iaa_crypto_enabled
) {
1504 pr_debug("iaa_crypto disabled, not compressing\n");
1508 if (!req
->src
|| !req
->slen
) {
1509 pr_debug("invalid src, not compressing\n");
1514 wq
= wq_table_next_wq(cpu
);
1517 pr_debug("no wq configured for cpu=%d\n", cpu
);
1521 ret
= iaa_wq_get(wq
);
1523 pr_debug("no wq available for cpu=%d\n", cpu
);
1527 iaa_wq
= idxd_wq_get_private(wq
);
1530 gfp_t flags
= req
->flags
& CRYPTO_TFM_REQ_MAY_SLEEP
? GFP_KERNEL
: GFP_ATOMIC
;
1532 /* incompressible data will always be < 2 * slen */
1533 req
->dlen
= 2 * req
->slen
;
1534 order
= order_base_2(round_up(req
->dlen
, PAGE_SIZE
) / PAGE_SIZE
);
1535 req
->dst
= sgl_alloc_order(req
->dlen
, order
, false, flags
, NULL
);
1541 disable_async
= true;
1544 dev
= &wq
->idxd
->pdev
->dev
;
1546 nr_sgs
= dma_map_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1547 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1548 dev_dbg(dev
, "couldn't map src sg for iaa device %d,"
1549 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1550 iaa_wq
->wq
->id
, ret
);
1554 src_addr
= sg_dma_address(req
->src
);
1555 dev_dbg(dev
, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1556 " req->slen %d, sg_dma_len(sg) %d\n", src_addr
, nr_sgs
,
1557 req
->src
, req
->slen
, sg_dma_len(req
->src
));
1559 nr_sgs
= dma_map_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1560 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1561 dev_dbg(dev
, "couldn't map dst sg for iaa device %d,"
1562 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1563 iaa_wq
->wq
->id
, ret
);
1567 dst_addr
= sg_dma_address(req
->dst
);
1568 dev_dbg(dev
, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1569 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr
, nr_sgs
,
1570 req
->dst
, req
->dlen
, sg_dma_len(req
->dst
));
1572 ret
= iaa_compress(tfm
, req
, wq
, src_addr
, req
->slen
, dst_addr
,
1573 &req
->dlen
, &compression_crc
, disable_async
);
1574 if (ret
== -EINPROGRESS
)
1577 if (!ret
&& compression_ctx
->verify_compress
) {
1578 ret
= iaa_remap_for_verify(dev
, iaa_wq
, req
, &src_addr
, &dst_addr
);
1580 dev_dbg(dev
, "%s: compress verify remap failed ret=%d\n", __func__
, ret
);
1584 ret
= iaa_compress_verify(tfm
, req
, wq
, src_addr
, req
->slen
,
1585 dst_addr
, &req
->dlen
, compression_crc
);
1587 dev_dbg(dev
, "asynchronous compress verification failed ret=%d\n", ret
);
1589 dma_unmap_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_TO_DEVICE
);
1590 dma_unmap_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_FROM_DEVICE
);
1596 dev_dbg(dev
, "asynchronous compress failed ret=%d\n", ret
);
1598 dma_unmap_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1600 dma_unmap_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1605 sgl_free_order(req
->dst
, order
);
1610 static int iaa_comp_adecompress_alloc_dest(struct acomp_req
*req
)
1612 gfp_t flags
= req
->flags
& CRYPTO_TFM_REQ_MAY_SLEEP
?
1613 GFP_KERNEL
: GFP_ATOMIC
;
1614 struct crypto_tfm
*tfm
= req
->base
.tfm
;
1615 dma_addr_t src_addr
, dst_addr
;
1616 int nr_sgs
, cpu
, ret
= 0;
1617 struct iaa_wq
*iaa_wq
;
1623 wq
= wq_table_next_wq(cpu
);
1626 pr_debug("no wq configured for cpu=%d\n", cpu
);
1630 ret
= iaa_wq_get(wq
);
1632 pr_debug("no wq available for cpu=%d\n", cpu
);
1636 iaa_wq
= idxd_wq_get_private(wq
);
1638 dev
= &wq
->idxd
->pdev
->dev
;
1640 nr_sgs
= dma_map_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1641 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1642 dev_dbg(dev
, "couldn't map src sg for iaa device %d,"
1643 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1644 iaa_wq
->wq
->id
, ret
);
1648 src_addr
= sg_dma_address(req
->src
);
1649 dev_dbg(dev
, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1650 " req->slen %d, sg_dma_len(sg) %d\n", src_addr
, nr_sgs
,
1651 req
->src
, req
->slen
, sg_dma_len(req
->src
));
1653 req
->dlen
= 4 * req
->slen
; /* start with ~avg comp rato */
1655 order
= order_base_2(round_up(req
->dlen
, PAGE_SIZE
) / PAGE_SIZE
);
1656 req
->dst
= sgl_alloc_order(req
->dlen
, order
, false, flags
, NULL
);
1663 nr_sgs
= dma_map_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1664 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1665 dev_dbg(dev
, "couldn't map dst sg for iaa device %d,"
1666 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1667 iaa_wq
->wq
->id
, ret
);
1672 dst_addr
= sg_dma_address(req
->dst
);
1673 dev_dbg(dev
, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1674 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr
, nr_sgs
,
1675 req
->dst
, req
->dlen
, sg_dma_len(req
->dst
));
1676 ret
= iaa_decompress(tfm
, req
, wq
, src_addr
, req
->slen
,
1677 dst_addr
, &req
->dlen
, true);
1678 if (ret
== -EOVERFLOW
) {
1679 dma_unmap_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1681 if (req
->dlen
> CRYPTO_ACOMP_DST_MAX
)
1687 dev_dbg(dev
, "asynchronous decompress failed ret=%d\n", ret
);
1689 dma_unmap_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1691 dma_unmap_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1696 sgl_free_order(req
->dst
, order
);
1701 static int iaa_comp_adecompress(struct acomp_req
*req
)
1703 struct crypto_tfm
*tfm
= req
->base
.tfm
;
1704 dma_addr_t src_addr
, dst_addr
;
1705 int nr_sgs
, cpu
, ret
= 0;
1706 struct iaa_wq
*iaa_wq
;
1710 if (!iaa_crypto_enabled
) {
1711 pr_debug("iaa_crypto disabled, not decompressing\n");
1715 if (!req
->src
|| !req
->slen
) {
1716 pr_debug("invalid src, not decompressing\n");
1721 return iaa_comp_adecompress_alloc_dest(req
);
1724 wq
= wq_table_next_wq(cpu
);
1727 pr_debug("no wq configured for cpu=%d\n", cpu
);
1731 ret
= iaa_wq_get(wq
);
1733 pr_debug("no wq available for cpu=%d\n", cpu
);
1737 iaa_wq
= idxd_wq_get_private(wq
);
1739 dev
= &wq
->idxd
->pdev
->dev
;
1741 nr_sgs
= dma_map_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1742 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1743 dev_dbg(dev
, "couldn't map src sg for iaa device %d,"
1744 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1745 iaa_wq
->wq
->id
, ret
);
1749 src_addr
= sg_dma_address(req
->src
);
1750 dev_dbg(dev
, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1751 " req->slen %d, sg_dma_len(sg) %d\n", src_addr
, nr_sgs
,
1752 req
->src
, req
->slen
, sg_dma_len(req
->src
));
1754 nr_sgs
= dma_map_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1755 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1756 dev_dbg(dev
, "couldn't map dst sg for iaa device %d,"
1757 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1758 iaa_wq
->wq
->id
, ret
);
1762 dst_addr
= sg_dma_address(req
->dst
);
1763 dev_dbg(dev
, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1764 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr
, nr_sgs
,
1765 req
->dst
, req
->dlen
, sg_dma_len(req
->dst
));
1767 ret
= iaa_decompress(tfm
, req
, wq
, src_addr
, req
->slen
,
1768 dst_addr
, &req
->dlen
, false);
1769 if (ret
== -EINPROGRESS
)
1773 dev_dbg(dev
, "asynchronous decompress failed ret=%d\n", ret
);
1775 dma_unmap_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1777 dma_unmap_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1784 static void compression_ctx_init(struct iaa_compression_ctx
*ctx
)
1786 ctx
->verify_compress
= iaa_verify_compress
;
1787 ctx
->async_mode
= async_mode
;
1788 ctx
->use_irq
= use_irq
;
1791 static int iaa_comp_init_fixed(struct crypto_acomp
*acomp_tfm
)
1793 struct crypto_tfm
*tfm
= crypto_acomp_tfm(acomp_tfm
);
1794 struct iaa_compression_ctx
*ctx
= crypto_tfm_ctx(tfm
);
1796 compression_ctx_init(ctx
);
1798 ctx
->mode
= IAA_MODE_FIXED
;
1803 static void dst_free(struct scatterlist
*sgl
)
1806 * Called for req->dst = NULL cases but we free elsewhere
1807 * using sgl_free_order().
1811 static struct acomp_alg iaa_acomp_fixed_deflate
= {
1812 .init
= iaa_comp_init_fixed
,
1813 .compress
= iaa_comp_acompress
,
1814 .decompress
= iaa_comp_adecompress
,
1815 .dst_free
= dst_free
,
1817 .cra_name
= "deflate",
1818 .cra_driver_name
= "deflate-iaa",
1819 .cra_flags
= CRYPTO_ALG_ASYNC
,
1820 .cra_ctxsize
= sizeof(struct iaa_compression_ctx
),
1821 .cra_module
= THIS_MODULE
,
1822 .cra_priority
= IAA_ALG_PRIORITY
,
1826 static int iaa_register_compression_device(void)
1830 ret
= crypto_register_acomp(&iaa_acomp_fixed_deflate
);
1832 pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret
);
1836 iaa_crypto_registered
= true;
1841 static int iaa_unregister_compression_device(void)
1843 if (iaa_crypto_registered
)
1844 crypto_unregister_acomp(&iaa_acomp_fixed_deflate
);
1849 static int iaa_crypto_probe(struct idxd_dev
*idxd_dev
)
1851 struct idxd_wq
*wq
= idxd_dev_to_wq(idxd_dev
);
1852 struct idxd_device
*idxd
= wq
->idxd
;
1853 struct idxd_driver_data
*data
= idxd
->data
;
1854 struct device
*dev
= &idxd_dev
->conf_dev
;
1855 bool first_wq
= false;
1858 if (idxd
->state
!= IDXD_DEV_ENABLED
)
1861 if (data
->type
!= IDXD_TYPE_IAX
)
1864 mutex_lock(&wq
->wq_lock
);
1866 if (idxd_wq_get_private(wq
)) {
1867 mutex_unlock(&wq
->wq_lock
);
1871 if (!idxd_wq_driver_name_match(wq
, dev
)) {
1872 dev_dbg(dev
, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n",
1873 idxd
->id
, wq
->id
, wq
->driver_name
, dev
->driver
->name
);
1874 idxd
->cmd_status
= IDXD_SCMD_WQ_NO_DRV_NAME
;
1879 wq
->type
= IDXD_WQT_KERNEL
;
1881 ret
= idxd_drv_enable_wq(wq
);
1883 dev_dbg(dev
, "enable wq %d.%d failed: %d\n",
1884 idxd
->id
, wq
->id
, ret
);
1889 mutex_lock(&iaa_devices_lock
);
1891 if (list_empty(&iaa_devices
)) {
1892 ret
= alloc_wq_table(wq
->idxd
->max_wqs
);
1898 ret
= save_iaa_wq(wq
);
1902 rebalance_wq_table();
1905 iaa_crypto_enabled
= true;
1906 ret
= iaa_register_compression_device();
1908 iaa_crypto_enabled
= false;
1909 dev_dbg(dev
, "IAA compression device registration failed\n");
1912 try_module_get(THIS_MODULE
);
1914 pr_info("iaa_crypto now ENABLED\n");
1917 mutex_unlock(&iaa_devices_lock
);
1919 mutex_unlock(&wq
->wq_lock
);
1925 free_iaa_wq(idxd_wq_get_private(wq
));
1930 mutex_unlock(&iaa_devices_lock
);
1931 idxd_drv_disable_wq(wq
);
1933 wq
->type
= IDXD_WQT_NONE
;
1938 static void iaa_crypto_remove(struct idxd_dev
*idxd_dev
)
1940 struct idxd_wq
*wq
= idxd_dev_to_wq(idxd_dev
);
1941 struct idxd_device
*idxd
= wq
->idxd
;
1942 struct iaa_wq
*iaa_wq
;
1945 idxd_wq_quiesce(wq
);
1947 mutex_lock(&wq
->wq_lock
);
1948 mutex_lock(&iaa_devices_lock
);
1952 spin_lock(&idxd
->dev_lock
);
1953 iaa_wq
= idxd_wq_get_private(wq
);
1955 spin_unlock(&idxd
->dev_lock
);
1956 pr_err("%s: no iaa_wq available to remove\n", __func__
);
1961 iaa_wq
->remove
= true;
1964 idxd_wq_set_private(wq
, NULL
);
1967 spin_unlock(&idxd
->dev_lock
);
1969 __free_iaa_wq(iaa_wq
);
1973 idxd_drv_disable_wq(wq
);
1974 rebalance_wq_table();
1977 iaa_crypto_enabled
= false;
1979 module_put(THIS_MODULE
);
1981 pr_info("iaa_crypto now DISABLED\n");
1984 mutex_unlock(&iaa_devices_lock
);
1985 mutex_unlock(&wq
->wq_lock
);
1988 static enum idxd_dev_type dev_types
[] = {
1993 static struct idxd_device_driver iaa_crypto_driver
= {
1994 .probe
= iaa_crypto_probe
,
1995 .remove
= iaa_crypto_remove
,
1996 .name
= IDXD_SUBDRIVER_NAME
,
1998 .desc_complete
= iaa_desc_complete
,
2001 static int __init
iaa_crypto_init_module(void)
2006 nr_cpus
= num_possible_cpus();
2007 for_each_node_with_cpus(node
)
2010 pr_err("IAA couldn't find any nodes with cpus\n");
2013 nr_cpus_per_node
= nr_cpus
/ nr_nodes
;
2015 if (crypto_has_comp("deflate-generic", 0, 0))
2016 deflate_generic_tfm
= crypto_alloc_comp("deflate-generic", 0, 0);
2018 if (IS_ERR_OR_NULL(deflate_generic_tfm
)) {
2019 pr_err("IAA could not alloc %s tfm: errcode = %ld\n",
2020 "deflate-generic", PTR_ERR(deflate_generic_tfm
));
2024 ret
= iaa_aecs_init_fixed();
2026 pr_debug("IAA fixed compression mode init failed\n");
2030 ret
= idxd_driver_register(&iaa_crypto_driver
);
2032 pr_debug("IAA wq sub-driver registration failed\n");
2033 goto err_driver_reg
;
2036 ret
= driver_create_file(&iaa_crypto_driver
.drv
,
2037 &driver_attr_verify_compress
);
2039 pr_debug("IAA verify_compress attr creation failed\n");
2040 goto err_verify_attr_create
;
2043 ret
= driver_create_file(&iaa_crypto_driver
.drv
,
2044 &driver_attr_sync_mode
);
2046 pr_debug("IAA sync mode attr creation failed\n");
2047 goto err_sync_attr_create
;
2050 if (iaa_crypto_debugfs_init())
2051 pr_warn("debugfs init failed, stats not available\n");
2053 pr_debug("initialized\n");
2057 err_sync_attr_create
:
2058 driver_remove_file(&iaa_crypto_driver
.drv
,
2059 &driver_attr_verify_compress
);
2060 err_verify_attr_create
:
2061 idxd_driver_unregister(&iaa_crypto_driver
);
2063 iaa_aecs_cleanup_fixed();
2065 crypto_free_comp(deflate_generic_tfm
);
2070 static void __exit
iaa_crypto_cleanup_module(void)
2072 if (iaa_unregister_compression_device())
2073 pr_debug("IAA compression device unregister failed\n");
2075 iaa_crypto_debugfs_cleanup();
2076 driver_remove_file(&iaa_crypto_driver
.drv
,
2077 &driver_attr_sync_mode
);
2078 driver_remove_file(&iaa_crypto_driver
.drv
,
2079 &driver_attr_verify_compress
);
2080 idxd_driver_unregister(&iaa_crypto_driver
);
2081 iaa_aecs_cleanup_fixed();
2082 crypto_free_comp(deflate_generic_tfm
);
2084 pr_debug("cleaned up\n");
2087 MODULE_IMPORT_NS(IDXD
);
2088 MODULE_LICENSE("GPL");
2089 MODULE_ALIAS_IDXD_DEVICE(0);
2090 MODULE_AUTHOR("Intel Corporation");
2091 MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver");
2093 module_init(iaa_crypto_init_module
);
2094 module_exit(iaa_crypto_cleanup_module
);