1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
4 #include <linux/init.h>
5 #include <linux/kernel.h>
6 #include <linux/module.h>
8 #include <linux/device.h>
9 #include <linux/iommu.h>
10 #include <uapi/linux/idxd.h>
11 #include <linux/highmem.h>
12 #include <linux/sched/smt.h>
13 #include <crypto/internal/acompress.h>
16 #include "iaa_crypto.h"
17 #include "iaa_crypto_stats.h"
23 #define pr_fmt(fmt) "idxd: " IDXD_SUBDRIVER_NAME ": " fmt
25 #define IAA_ALG_PRIORITY 300
27 /* number of iaa instances probed */
28 static unsigned int nr_iaa
;
29 static unsigned int nr_cpus
;
30 static unsigned int nr_nodes
;
31 static unsigned int nr_cpus_per_node
;
33 /* Number of physical cpus sharing each iaa instance */
34 static unsigned int cpus_per_iaa
;
36 static struct crypto_comp
*deflate_generic_tfm
;
38 /* Per-cpu lookup table for balanced wqs */
39 static struct wq_table_entry __percpu
*wq_table
;
41 static struct idxd_wq
*wq_table_next_wq(int cpu
)
43 struct wq_table_entry
*entry
= per_cpu_ptr(wq_table
, cpu
);
45 if (++entry
->cur_wq
>= entry
->n_wqs
)
48 if (!entry
->wqs
[entry
->cur_wq
])
51 pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__
,
52 entry
->cur_wq
, entry
->wqs
[entry
->cur_wq
]->idxd
->id
,
53 entry
->wqs
[entry
->cur_wq
]->id
, cpu
);
55 return entry
->wqs
[entry
->cur_wq
];
58 static void wq_table_add(int cpu
, struct idxd_wq
*wq
)
60 struct wq_table_entry
*entry
= per_cpu_ptr(wq_table
, cpu
);
62 if (WARN_ON(entry
->n_wqs
== entry
->max_wqs
))
65 entry
->wqs
[entry
->n_wqs
++] = wq
;
67 pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__
,
68 entry
->wqs
[entry
->n_wqs
- 1]->idxd
->id
,
69 entry
->wqs
[entry
->n_wqs
- 1]->id
, entry
->n_wqs
- 1, cpu
);
72 static void wq_table_free_entry(int cpu
)
74 struct wq_table_entry
*entry
= per_cpu_ptr(wq_table
, cpu
);
77 memset(entry
, 0, sizeof(*entry
));
80 static void wq_table_clear_entry(int cpu
)
82 struct wq_table_entry
*entry
= per_cpu_ptr(wq_table
, cpu
);
86 memset(entry
->wqs
, 0, entry
->max_wqs
* sizeof(struct idxd_wq
*));
89 LIST_HEAD(iaa_devices
);
90 DEFINE_MUTEX(iaa_devices_lock
);
92 /* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
93 static bool iaa_crypto_enabled
;
94 static bool iaa_crypto_registered
;
96 /* Verify results of IAA compress or not */
97 static bool iaa_verify_compress
= true;
99 static ssize_t
verify_compress_show(struct device_driver
*driver
, char *buf
)
101 return sprintf(buf
, "%d\n", iaa_verify_compress
);
104 static ssize_t
verify_compress_store(struct device_driver
*driver
,
105 const char *buf
, size_t count
)
109 mutex_lock(&iaa_devices_lock
);
111 if (iaa_crypto_enabled
)
114 ret
= kstrtobool(buf
, &iaa_verify_compress
);
120 mutex_unlock(&iaa_devices_lock
);
124 static DRIVER_ATTR_RW(verify_compress
);
127 * The iaa crypto driver supports three 'sync' methods determining how
128 * compressions and decompressions are performed:
130 * - sync: the compression or decompression completes before
131 * returning. This is the mode used by the async crypto
132 * interface when the sync mode is set to 'sync' and by
133 * the sync crypto interface regardless of setting.
135 * - async: the compression or decompression is submitted and returns
136 * immediately. Completion interrupts are not used so
137 * the caller is responsible for polling the descriptor
138 * for completion. This mode is applicable to only the
139 * async crypto interface and is ignored for anything
142 * - async_irq: the compression or decompression is submitted and
143 * returns immediately. Completion interrupts are
144 * enabled so the caller can wait for the completion and
145 * yield to other threads. When the compression or
146 * decompression completes, the completion is signaled
147 * and the caller awakened. This mode is applicable to
148 * only the async crypto interface and is ignored for
151 * These modes can be set using the iaa_crypto sync_mode driver
156 static bool async_mode
;
161 * set_iaa_sync_mode - Set IAA sync mode
162 * @name: The name of the sync mode
164 * Make the IAA sync mode named @name the current sync mode used by
165 * compression/decompression.
168 static int set_iaa_sync_mode(const char *name
)
172 if (sysfs_streq(name
, "sync")) {
175 } else if (sysfs_streq(name
, "async")) {
178 } else if (sysfs_streq(name
, "async_irq")) {
188 static ssize_t
sync_mode_show(struct device_driver
*driver
, char *buf
)
192 if (!async_mode
&& !use_irq
)
193 ret
= sprintf(buf
, "%s\n", "sync");
194 else if (async_mode
&& !use_irq
)
195 ret
= sprintf(buf
, "%s\n", "async");
196 else if (async_mode
&& use_irq
)
197 ret
= sprintf(buf
, "%s\n", "async_irq");
202 static ssize_t
sync_mode_store(struct device_driver
*driver
,
203 const char *buf
, size_t count
)
207 mutex_lock(&iaa_devices_lock
);
209 if (iaa_crypto_enabled
)
212 ret
= set_iaa_sync_mode(buf
);
216 mutex_unlock(&iaa_devices_lock
);
220 static DRIVER_ATTR_RW(sync_mode
);
222 static struct iaa_compression_mode
*iaa_compression_modes
[IAA_COMP_MODES_MAX
];
224 static int find_empty_iaa_compression_mode(void)
228 for (i
= 0; i
< IAA_COMP_MODES_MAX
; i
++) {
229 if (iaa_compression_modes
[i
])
237 static struct iaa_compression_mode
*find_iaa_compression_mode(const char *name
, int *idx
)
239 struct iaa_compression_mode
*mode
;
242 for (i
= 0; i
< IAA_COMP_MODES_MAX
; i
++) {
243 mode
= iaa_compression_modes
[i
];
247 if (!strcmp(mode
->name
, name
)) {
249 return iaa_compression_modes
[i
];
256 static void free_iaa_compression_mode(struct iaa_compression_mode
*mode
)
259 kfree(mode
->ll_table
);
260 kfree(mode
->d_table
);
266 * IAA Compression modes are defined by an ll_table and a d_table.
267 * These tables are typically generated and captured using statistics
268 * collected from running actual compress/decompress workloads.
270 * A module or other kernel code can add and remove compression modes
271 * with a given name using the exported @add_iaa_compression_mode()
272 * and @remove_iaa_compression_mode functions.
274 * When a new compression mode is added, the tables are saved in a
275 * global compression mode list. When IAA devices are added, a
276 * per-IAA device dma mapping is created for each IAA device, for each
277 * compression mode. These are the tables used to do the actual
278 * compression/deccompression and are unmapped if/when the devices are
279 * removed. Currently, compression modes must be added before any
280 * device is added, and removed after all devices have been removed.
284 * remove_iaa_compression_mode - Remove an IAA compression mode
285 * @name: The name the compression mode will be known as
287 * Remove the IAA compression mode named @name.
289 void remove_iaa_compression_mode(const char *name
)
291 struct iaa_compression_mode
*mode
;
294 mutex_lock(&iaa_devices_lock
);
296 if (!list_empty(&iaa_devices
))
299 mode
= find_iaa_compression_mode(name
, &idx
);
301 free_iaa_compression_mode(mode
);
302 iaa_compression_modes
[idx
] = NULL
;
305 mutex_unlock(&iaa_devices_lock
);
307 EXPORT_SYMBOL_GPL(remove_iaa_compression_mode
);
310 * add_iaa_compression_mode - Add an IAA compression mode
311 * @name: The name the compression mode will be known as
312 * @ll_table: The ll table
313 * @ll_table_size: The ll table size in bytes
314 * @d_table: The d table
315 * @d_table_size: The d table size in bytes
316 * @init: Optional callback function to init the compression mode data
317 * @free: Optional callback function to free the compression mode data
319 * Add a new IAA compression mode named @name.
321 * Returns 0 if successful, errcode otherwise.
323 int add_iaa_compression_mode(const char *name
,
328 iaa_dev_comp_init_fn_t init
,
329 iaa_dev_comp_free_fn_t free
)
331 struct iaa_compression_mode
*mode
;
332 int idx
, ret
= -ENOMEM
;
334 mutex_lock(&iaa_devices_lock
);
336 if (!list_empty(&iaa_devices
)) {
341 mode
= kzalloc(sizeof(*mode
), GFP_KERNEL
);
345 mode
->name
= kstrdup(name
, GFP_KERNEL
);
350 mode
->ll_table
= kmemdup(ll_table
, ll_table_size
, GFP_KERNEL
);
353 mode
->ll_table_size
= ll_table_size
;
357 mode
->d_table
= kmemdup(d_table
, d_table_size
, GFP_KERNEL
);
360 mode
->d_table_size
= d_table_size
;
366 idx
= find_empty_iaa_compression_mode();
370 pr_debug("IAA compression mode %s added at idx %d\n",
373 iaa_compression_modes
[idx
] = mode
;
377 mutex_unlock(&iaa_devices_lock
);
381 free_iaa_compression_mode(mode
);
384 EXPORT_SYMBOL_GPL(add_iaa_compression_mode
);
386 static struct iaa_device_compression_mode
*
387 get_iaa_device_compression_mode(struct iaa_device
*iaa_device
, int idx
)
389 return iaa_device
->compression_modes
[idx
];
392 static void free_device_compression_mode(struct iaa_device
*iaa_device
,
393 struct iaa_device_compression_mode
*device_mode
)
395 size_t size
= sizeof(struct aecs_comp_table_record
) + IAA_AECS_ALIGN
;
396 struct device
*dev
= &iaa_device
->idxd
->pdev
->dev
;
398 kfree(device_mode
->name
);
400 if (device_mode
->aecs_comp_table
)
401 dma_free_coherent(dev
, size
, device_mode
->aecs_comp_table
,
402 device_mode
->aecs_comp_table_dma_addr
);
406 #define IDXD_OP_FLAG_AECS_RW_TGLS 0x400000
407 #define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC)
408 #define IAX_AECS_COMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
409 #define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
410 #define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \
411 IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
412 IDXD_OP_FLAG_AECS_RW_TGLS)
414 static int check_completion(struct device
*dev
,
415 struct iax_completion_record
*comp
,
419 static int init_device_compression_mode(struct iaa_device
*iaa_device
,
420 struct iaa_compression_mode
*mode
,
421 int idx
, struct idxd_wq
*wq
)
423 size_t size
= sizeof(struct aecs_comp_table_record
) + IAA_AECS_ALIGN
;
424 struct device
*dev
= &iaa_device
->idxd
->pdev
->dev
;
425 struct iaa_device_compression_mode
*device_mode
;
428 device_mode
= kzalloc(sizeof(*device_mode
), GFP_KERNEL
);
432 device_mode
->name
= kstrdup(mode
->name
, GFP_KERNEL
);
433 if (!device_mode
->name
)
436 device_mode
->aecs_comp_table
= dma_alloc_coherent(dev
, size
,
437 &device_mode
->aecs_comp_table_dma_addr
, GFP_KERNEL
);
438 if (!device_mode
->aecs_comp_table
)
441 /* Add Huffman table to aecs */
442 memset(device_mode
->aecs_comp_table
, 0, sizeof(*device_mode
->aecs_comp_table
));
443 memcpy(device_mode
->aecs_comp_table
->ll_sym
, mode
->ll_table
, mode
->ll_table_size
);
444 memcpy(device_mode
->aecs_comp_table
->d_sym
, mode
->d_table
, mode
->d_table_size
);
447 ret
= mode
->init(device_mode
);
452 /* mode index should match iaa_compression_modes idx */
453 iaa_device
->compression_modes
[idx
] = device_mode
;
455 pr_debug("IAA %s compression mode initialized for iaa device %d\n",
456 mode
->name
, iaa_device
->idxd
->id
);
462 pr_debug("IAA %s compression mode initialization failed for iaa device %d\n",
463 mode
->name
, iaa_device
->idxd
->id
);
465 free_device_compression_mode(iaa_device
, device_mode
);
469 static int init_device_compression_modes(struct iaa_device
*iaa_device
,
472 struct iaa_compression_mode
*mode
;
475 for (i
= 0; i
< IAA_COMP_MODES_MAX
; i
++) {
476 mode
= iaa_compression_modes
[i
];
480 ret
= init_device_compression_mode(iaa_device
, mode
, i
, wq
);
488 static void remove_device_compression_modes(struct iaa_device
*iaa_device
)
490 struct iaa_device_compression_mode
*device_mode
;
493 for (i
= 0; i
< IAA_COMP_MODES_MAX
; i
++) {
494 device_mode
= iaa_device
->compression_modes
[i
];
498 if (iaa_compression_modes
[i
]->free
)
499 iaa_compression_modes
[i
]->free(device_mode
);
500 free_device_compression_mode(iaa_device
, device_mode
);
501 iaa_device
->compression_modes
[i
] = NULL
;
505 static struct iaa_device
*iaa_device_alloc(void)
507 struct iaa_device
*iaa_device
;
509 iaa_device
= kzalloc(sizeof(*iaa_device
), GFP_KERNEL
);
513 INIT_LIST_HEAD(&iaa_device
->wqs
);
518 static bool iaa_has_wq(struct iaa_device
*iaa_device
, struct idxd_wq
*wq
)
520 struct iaa_wq
*iaa_wq
;
522 list_for_each_entry(iaa_wq
, &iaa_device
->wqs
, list
) {
523 if (iaa_wq
->wq
== wq
)
530 static struct iaa_device
*add_iaa_device(struct idxd_device
*idxd
)
532 struct iaa_device
*iaa_device
;
534 iaa_device
= iaa_device_alloc();
538 iaa_device
->idxd
= idxd
;
540 list_add_tail(&iaa_device
->list
, &iaa_devices
);
547 static int init_iaa_device(struct iaa_device
*iaa_device
, struct iaa_wq
*iaa_wq
)
551 ret
= init_device_compression_modes(iaa_device
, iaa_wq
->wq
);
558 static void del_iaa_device(struct iaa_device
*iaa_device
)
560 list_del(&iaa_device
->list
);
565 static int add_iaa_wq(struct iaa_device
*iaa_device
, struct idxd_wq
*wq
,
566 struct iaa_wq
**new_wq
)
568 struct idxd_device
*idxd
= iaa_device
->idxd
;
569 struct pci_dev
*pdev
= idxd
->pdev
;
570 struct device
*dev
= &pdev
->dev
;
571 struct iaa_wq
*iaa_wq
;
573 iaa_wq
= kzalloc(sizeof(*iaa_wq
), GFP_KERNEL
);
578 iaa_wq
->iaa_device
= iaa_device
;
579 idxd_wq_set_private(wq
, iaa_wq
);
581 list_add_tail(&iaa_wq
->list
, &iaa_device
->wqs
);
588 dev_dbg(dev
, "added wq %d to iaa device %d, n_wq %d\n",
589 wq
->id
, iaa_device
->idxd
->id
, iaa_device
->n_wq
);
594 static void del_iaa_wq(struct iaa_device
*iaa_device
, struct idxd_wq
*wq
)
596 struct idxd_device
*idxd
= iaa_device
->idxd
;
597 struct pci_dev
*pdev
= idxd
->pdev
;
598 struct device
*dev
= &pdev
->dev
;
599 struct iaa_wq
*iaa_wq
;
601 list_for_each_entry(iaa_wq
, &iaa_device
->wqs
, list
) {
602 if (iaa_wq
->wq
== wq
) {
603 list_del(&iaa_wq
->list
);
606 dev_dbg(dev
, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n",
607 wq
->id
, iaa_device
->idxd
->id
,
608 iaa_device
->n_wq
, nr_iaa
);
610 if (iaa_device
->n_wq
== 0)
611 del_iaa_device(iaa_device
);
617 static void clear_wq_table(void)
621 for (cpu
= 0; cpu
< nr_cpus
; cpu
++)
622 wq_table_clear_entry(cpu
);
624 pr_debug("cleared wq table\n");
627 static void free_iaa_device(struct iaa_device
*iaa_device
)
632 remove_device_compression_modes(iaa_device
);
636 static void __free_iaa_wq(struct iaa_wq
*iaa_wq
)
638 struct iaa_device
*iaa_device
;
643 iaa_device
= iaa_wq
->iaa_device
;
644 if (iaa_device
->n_wq
== 0)
645 free_iaa_device(iaa_wq
->iaa_device
);
648 static void free_iaa_wq(struct iaa_wq
*iaa_wq
)
652 __free_iaa_wq(iaa_wq
);
657 idxd_wq_set_private(wq
, NULL
);
660 static int iaa_wq_get(struct idxd_wq
*wq
)
662 struct idxd_device
*idxd
= wq
->idxd
;
663 struct iaa_wq
*iaa_wq
;
666 spin_lock(&idxd
->dev_lock
);
667 iaa_wq
= idxd_wq_get_private(wq
);
668 if (iaa_wq
&& !iaa_wq
->remove
) {
674 spin_unlock(&idxd
->dev_lock
);
679 static int iaa_wq_put(struct idxd_wq
*wq
)
681 struct idxd_device
*idxd
= wq
->idxd
;
682 struct iaa_wq
*iaa_wq
;
686 spin_lock(&idxd
->dev_lock
);
687 iaa_wq
= idxd_wq_get_private(wq
);
690 if (iaa_wq
->ref
== 0 && iaa_wq
->remove
) {
691 idxd_wq_set_private(wq
, NULL
);
698 spin_unlock(&idxd
->dev_lock
);
700 __free_iaa_wq(iaa_wq
);
707 static void free_wq_table(void)
711 for (cpu
= 0; cpu
< nr_cpus
; cpu
++)
712 wq_table_free_entry(cpu
);
714 free_percpu(wq_table
);
716 pr_debug("freed wq table\n");
719 static int alloc_wq_table(int max_wqs
)
721 struct wq_table_entry
*entry
;
724 wq_table
= alloc_percpu(struct wq_table_entry
);
728 for (cpu
= 0; cpu
< nr_cpus
; cpu
++) {
729 entry
= per_cpu_ptr(wq_table
, cpu
);
730 entry
->wqs
= kcalloc(max_wqs
, sizeof(struct wq
*), GFP_KERNEL
);
736 entry
->max_wqs
= max_wqs
;
739 pr_debug("initialized wq table\n");
744 static int save_iaa_wq(struct idxd_wq
*wq
)
746 struct iaa_device
*iaa_device
, *found
= NULL
;
747 struct idxd_device
*idxd
;
748 struct pci_dev
*pdev
;
752 list_for_each_entry(iaa_device
, &iaa_devices
, list
) {
753 if (iaa_device
->idxd
== wq
->idxd
) {
754 idxd
= iaa_device
->idxd
;
758 * Check to see that we don't already have this wq.
759 * Shouldn't happen but we don't control probing.
761 if (iaa_has_wq(iaa_device
, wq
)) {
762 dev_dbg(dev
, "same wq probed multiple times for iaa_device %p\n",
769 ret
= add_iaa_wq(iaa_device
, wq
, NULL
);
778 struct iaa_device
*new_device
;
779 struct iaa_wq
*new_wq
;
781 new_device
= add_iaa_device(wq
->idxd
);
787 ret
= add_iaa_wq(new_device
, wq
, &new_wq
);
789 del_iaa_device(new_device
);
790 free_iaa_device(new_device
);
794 ret
= init_iaa_device(new_device
, new_wq
);
796 del_iaa_wq(new_device
, new_wq
->wq
);
797 del_iaa_device(new_device
);
803 if (WARN_ON(nr_iaa
== 0))
806 cpus_per_iaa
= (nr_nodes
* nr_cpus_per_node
) / nr_iaa
;
813 static void remove_iaa_wq(struct idxd_wq
*wq
)
815 struct iaa_device
*iaa_device
;
817 list_for_each_entry(iaa_device
, &iaa_devices
, list
) {
818 if (iaa_has_wq(iaa_device
, wq
)) {
819 del_iaa_wq(iaa_device
, wq
);
825 cpus_per_iaa
= (nr_nodes
* nr_cpus_per_node
) / nr_iaa
;
832 static int wq_table_add_wqs(int iaa
, int cpu
)
834 struct iaa_device
*iaa_device
, *found_device
= NULL
;
835 int ret
= 0, cur_iaa
= 0, n_wqs_added
= 0;
836 struct idxd_device
*idxd
;
837 struct iaa_wq
*iaa_wq
;
838 struct pci_dev
*pdev
;
841 list_for_each_entry(iaa_device
, &iaa_devices
, list
) {
842 idxd
= iaa_device
->idxd
;
846 if (cur_iaa
!= iaa
) {
851 found_device
= iaa_device
;
852 dev_dbg(dev
, "getting wq from iaa_device %d, cur_iaa %d\n",
853 found_device
->idxd
->id
, cur_iaa
);
858 found_device
= list_first_entry_or_null(&iaa_devices
,
859 struct iaa_device
, list
);
861 pr_debug("couldn't find any iaa devices with wqs!\n");
867 idxd
= found_device
->idxd
;
870 dev_dbg(dev
, "getting wq from only iaa_device %d, cur_iaa %d\n",
871 found_device
->idxd
->id
, cur_iaa
);
874 list_for_each_entry(iaa_wq
, &found_device
->wqs
, list
) {
875 wq_table_add(cpu
, iaa_wq
->wq
);
876 pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n",
877 cpu
, iaa_wq
->wq
->idxd
->id
, iaa_wq
->wq
->id
);
882 pr_debug("couldn't find any iaa wqs!\n");
891 * Rebalance the wq table so that given a cpu, it's easy to find the
892 * closest IAA instance. The idea is to try to choose the most
893 * appropriate IAA instance for a caller and spread available
894 * workqueues around to clients.
896 static void rebalance_wq_table(void)
898 const struct cpumask
*node_cpus
;
899 int node
, cpu
, iaa
= -1;
904 pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n",
905 nr_nodes
, nr_cpus
, nr_iaa
, cpus_per_iaa
);
910 for (cpu
= 0; cpu
< nr_cpus
; cpu
++) {
911 if (WARN_ON(wq_table_add_wqs(0, cpu
))) {
912 pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu
);
920 for_each_node_with_cpus(node
) {
921 node_cpus
= cpumask_of_node(node
);
923 for (cpu
= 0; cpu
< cpumask_weight(node_cpus
); cpu
++) {
924 int node_cpu
= cpumask_nth(cpu
, node_cpus
);
926 if (WARN_ON(node_cpu
>= nr_cpu_ids
)) {
927 pr_debug("node_cpu %d doesn't exist!\n", node_cpu
);
931 if ((cpu
% cpus_per_iaa
) == 0)
934 if (WARN_ON(wq_table_add_wqs(iaa
, node_cpu
))) {
935 pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa
, cpu
);
942 static inline int check_completion(struct device
*dev
,
943 struct iax_completion_record
*comp
,
947 char *op_str
= compress
? "compress" : "decompress";
948 int status_checks
= 0;
951 while (!comp
->status
) {
955 if (status_checks
++ >= IAA_COMPLETION_TIMEOUT
) {
956 /* Something is wrong with the hw, disable it. */
957 dev_err(dev
, "%s completion timed out - "
958 "assuming broken hw, iaa_crypto now DISABLED\n",
960 iaa_crypto_enabled
= false;
966 if (comp
->status
!= IAX_COMP_SUCCESS
) {
967 if (comp
->status
== IAA_ERROR_WATCHDOG_EXPIRED
) {
969 dev_dbg(dev
, "%s timed out, size=0x%x\n",
970 op_str
, comp
->output_size
);
971 update_completion_timeout_errs();
975 if (comp
->status
== IAA_ANALYTICS_ERROR
&&
976 comp
->error_code
== IAA_ERROR_COMP_BUF_OVERFLOW
&& compress
) {
978 dev_dbg(dev
, "compressed > uncompressed size,"
979 " not compressing, size=0x%x\n",
981 update_completion_comp_buf_overflow_errs();
985 if (comp
->status
== IAA_ERROR_DECOMP_BUF_OVERFLOW
) {
991 dev_dbg(dev
, "iaa %s status=0x%x, error=0x%x, size=0x%x\n",
992 op_str
, comp
->status
, comp
->error_code
, comp
->output_size
);
993 print_hex_dump(KERN_INFO
, "cmp-rec: ", DUMP_PREFIX_OFFSET
, 8, 1, comp
, 64, 0);
994 update_completion_einval_errs();
1002 static int deflate_generic_decompress(struct acomp_req
*req
)
1007 src
= kmap_local_page(sg_page(req
->src
)) + req
->src
->offset
;
1008 dst
= kmap_local_page(sg_page(req
->dst
)) + req
->dst
->offset
;
1010 ret
= crypto_comp_decompress(deflate_generic_tfm
,
1011 src
, req
->slen
, dst
, &req
->dlen
);
1016 update_total_sw_decomp_calls();
1021 static int iaa_remap_for_verify(struct device
*dev
, struct iaa_wq
*iaa_wq
,
1022 struct acomp_req
*req
,
1023 dma_addr_t
*src_addr
, dma_addr_t
*dst_addr
);
1025 static int iaa_compress_verify(struct crypto_tfm
*tfm
, struct acomp_req
*req
,
1027 dma_addr_t src_addr
, unsigned int slen
,
1028 dma_addr_t dst_addr
, unsigned int *dlen
,
1029 u32 compression_crc
);
1031 static void iaa_desc_complete(struct idxd_desc
*idxd_desc
,
1032 enum idxd_complete_type comp_type
,
1033 bool free_desc
, void *__ctx
,
1036 struct iaa_device_compression_mode
*active_compression_mode
;
1037 struct iaa_compression_ctx
*compression_ctx
;
1038 struct crypto_ctx
*ctx
= __ctx
;
1039 struct iaa_device
*iaa_device
;
1040 struct idxd_device
*idxd
;
1041 struct iaa_wq
*iaa_wq
;
1042 struct pci_dev
*pdev
;
1046 compression_ctx
= crypto_tfm_ctx(ctx
->tfm
);
1048 iaa_wq
= idxd_wq_get_private(idxd_desc
->wq
);
1049 iaa_device
= iaa_wq
->iaa_device
;
1050 idxd
= iaa_device
->idxd
;
1054 active_compression_mode
= get_iaa_device_compression_mode(iaa_device
,
1055 compression_ctx
->mode
);
1056 dev_dbg(dev
, "%s: compression mode %s,"
1057 " ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__
,
1058 active_compression_mode
->name
,
1059 ctx
->src_addr
, ctx
->dst_addr
);
1061 ret
= check_completion(dev
, idxd_desc
->iax_completion
,
1062 ctx
->compress
, false);
1064 dev_dbg(dev
, "%s: check_completion failed ret=%d\n", __func__
, ret
);
1065 if (!ctx
->compress
&&
1066 idxd_desc
->iax_completion
->status
== IAA_ANALYTICS_ERROR
) {
1067 pr_warn("%s: falling back to deflate-generic decompress, "
1068 "analytics error code %x\n", __func__
,
1069 idxd_desc
->iax_completion
->error_code
);
1070 ret
= deflate_generic_decompress(ctx
->req
);
1072 dev_dbg(dev
, "%s: deflate-generic failed ret=%d\n",
1082 ctx
->req
->dlen
= idxd_desc
->iax_completion
->output_size
;
1086 if (ctx
->compress
) {
1087 update_total_comp_bytes_out(ctx
->req
->dlen
);
1088 update_wq_comp_bytes(iaa_wq
->wq
, ctx
->req
->dlen
);
1090 update_total_decomp_bytes_in(ctx
->req
->slen
);
1091 update_wq_decomp_bytes(iaa_wq
->wq
, ctx
->req
->slen
);
1094 if (ctx
->compress
&& compression_ctx
->verify_compress
) {
1095 dma_addr_t src_addr
, dst_addr
;
1096 u32 compression_crc
;
1098 compression_crc
= idxd_desc
->iax_completion
->crc
;
1100 ret
= iaa_remap_for_verify(dev
, iaa_wq
, ctx
->req
, &src_addr
, &dst_addr
);
1102 dev_dbg(dev
, "%s: compress verify remap failed ret=%d\n", __func__
, ret
);
1107 ret
= iaa_compress_verify(ctx
->tfm
, ctx
->req
, iaa_wq
->wq
, src_addr
,
1108 ctx
->req
->slen
, dst_addr
, &ctx
->req
->dlen
,
1111 dev_dbg(dev
, "%s: compress verify failed ret=%d\n", __func__
, ret
);
1115 dma_unmap_sg(dev
, ctx
->req
->dst
, sg_nents(ctx
->req
->dst
), DMA_TO_DEVICE
);
1116 dma_unmap_sg(dev
, ctx
->req
->src
, sg_nents(ctx
->req
->src
), DMA_FROM_DEVICE
);
1121 dma_unmap_sg(dev
, ctx
->req
->dst
, sg_nents(ctx
->req
->dst
), DMA_FROM_DEVICE
);
1122 dma_unmap_sg(dev
, ctx
->req
->src
, sg_nents(ctx
->req
->src
), DMA_TO_DEVICE
);
1125 dev_dbg(dev
, "asynchronous compress failed ret=%d\n", ret
);
1127 if (ctx
->req
->base
.complete
)
1128 acomp_request_complete(ctx
->req
, err
);
1131 idxd_free_desc(idxd_desc
->wq
, idxd_desc
);
1132 iaa_wq_put(idxd_desc
->wq
);
1135 static int iaa_compress(struct crypto_tfm
*tfm
, struct acomp_req
*req
,
1137 dma_addr_t src_addr
, unsigned int slen
,
1138 dma_addr_t dst_addr
, unsigned int *dlen
,
1139 u32
*compression_crc
,
1142 struct iaa_device_compression_mode
*active_compression_mode
;
1143 struct iaa_compression_ctx
*ctx
= crypto_tfm_ctx(tfm
);
1144 struct iaa_device
*iaa_device
;
1145 struct idxd_desc
*idxd_desc
;
1146 struct iax_hw_desc
*desc
;
1147 struct idxd_device
*idxd
;
1148 struct iaa_wq
*iaa_wq
;
1149 struct pci_dev
*pdev
;
1153 iaa_wq
= idxd_wq_get_private(wq
);
1154 iaa_device
= iaa_wq
->iaa_device
;
1155 idxd
= iaa_device
->idxd
;
1159 active_compression_mode
= get_iaa_device_compression_mode(iaa_device
, ctx
->mode
);
1161 idxd_desc
= idxd_alloc_desc(wq
, IDXD_OP_BLOCK
);
1162 if (IS_ERR(idxd_desc
)) {
1163 dev_dbg(dev
, "idxd descriptor allocation failed\n");
1164 dev_dbg(dev
, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc
));
1165 return PTR_ERR(idxd_desc
);
1167 desc
= idxd_desc
->iax_hw
;
1169 desc
->flags
= IDXD_OP_FLAG_CRAV
| IDXD_OP_FLAG_RCR
|
1170 IDXD_OP_FLAG_RD_SRC2_AECS
| IDXD_OP_FLAG_CC
;
1171 desc
->opcode
= IAX_OPCODE_COMPRESS
;
1172 desc
->compr_flags
= IAA_COMP_FLAGS
;
1175 desc
->src1_addr
= (u64
)src_addr
;
1176 desc
->src1_size
= slen
;
1177 desc
->dst_addr
= (u64
)dst_addr
;
1178 desc
->max_dst_size
= *dlen
;
1179 desc
->src2_addr
= active_compression_mode
->aecs_comp_table_dma_addr
;
1180 desc
->src2_size
= sizeof(struct aecs_comp_table_record
);
1181 desc
->completion_addr
= idxd_desc
->compl_dma
;
1183 if (ctx
->use_irq
&& !disable_async
) {
1184 desc
->flags
|= IDXD_OP_FLAG_RCI
;
1186 idxd_desc
->crypto
.req
= req
;
1187 idxd_desc
->crypto
.tfm
= tfm
;
1188 idxd_desc
->crypto
.src_addr
= src_addr
;
1189 idxd_desc
->crypto
.dst_addr
= dst_addr
;
1190 idxd_desc
->crypto
.compress
= true;
1192 dev_dbg(dev
, "%s use_async_irq: compression mode %s,"
1193 " src_addr %llx, dst_addr %llx\n", __func__
,
1194 active_compression_mode
->name
,
1195 src_addr
, dst_addr
);
1196 } else if (ctx
->async_mode
&& !disable_async
)
1197 req
->base
.data
= idxd_desc
;
1199 dev_dbg(dev
, "%s: compression mode %s,"
1200 " desc->src1_addr %llx, desc->src1_size %d,"
1201 " desc->dst_addr %llx, desc->max_dst_size %d,"
1202 " desc->src2_addr %llx, desc->src2_size %d\n", __func__
,
1203 active_compression_mode
->name
,
1204 desc
->src1_addr
, desc
->src1_size
, desc
->dst_addr
,
1205 desc
->max_dst_size
, desc
->src2_addr
, desc
->src2_size
);
1207 ret
= idxd_submit_desc(wq
, idxd_desc
);
1209 dev_dbg(dev
, "submit_desc failed ret=%d\n", ret
);
1214 update_total_comp_calls();
1215 update_wq_comp_calls(wq
);
1217 if (ctx
->async_mode
&& !disable_async
) {
1219 dev_dbg(dev
, "%s: returning -EINPROGRESS\n", __func__
);
1223 ret
= check_completion(dev
, idxd_desc
->iax_completion
, true, false);
1225 dev_dbg(dev
, "check_completion failed ret=%d\n", ret
);
1229 *dlen
= idxd_desc
->iax_completion
->output_size
;
1232 update_total_comp_bytes_out(*dlen
);
1233 update_wq_comp_bytes(wq
, *dlen
);
1235 *compression_crc
= idxd_desc
->iax_completion
->crc
;
1237 if (!ctx
->async_mode
|| disable_async
)
1238 idxd_free_desc(wq
, idxd_desc
);
1242 idxd_free_desc(wq
, idxd_desc
);
1243 dev_dbg(dev
, "iaa compress failed: ret=%d\n", ret
);
1248 static int iaa_remap_for_verify(struct device
*dev
, struct iaa_wq
*iaa_wq
,
1249 struct acomp_req
*req
,
1250 dma_addr_t
*src_addr
, dma_addr_t
*dst_addr
)
1255 dma_unmap_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1256 dma_unmap_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1258 nr_sgs
= dma_map_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_FROM_DEVICE
);
1259 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1260 dev_dbg(dev
, "verify: couldn't map src sg for iaa device %d,"
1261 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1262 iaa_wq
->wq
->id
, ret
);
1266 *src_addr
= sg_dma_address(req
->src
);
1267 dev_dbg(dev
, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1268 " req->slen %d, sg_dma_len(sg) %d\n", *src_addr
, nr_sgs
,
1269 req
->src
, req
->slen
, sg_dma_len(req
->src
));
1271 nr_sgs
= dma_map_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_TO_DEVICE
);
1272 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1273 dev_dbg(dev
, "verify: couldn't map dst sg for iaa device %d,"
1274 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1275 iaa_wq
->wq
->id
, ret
);
1277 dma_unmap_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_FROM_DEVICE
);
1280 *dst_addr
= sg_dma_address(req
->dst
);
1281 dev_dbg(dev
, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1282 " req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr
, nr_sgs
,
1283 req
->dst
, req
->dlen
, sg_dma_len(req
->dst
));
1288 static int iaa_compress_verify(struct crypto_tfm
*tfm
, struct acomp_req
*req
,
1290 dma_addr_t src_addr
, unsigned int slen
,
1291 dma_addr_t dst_addr
, unsigned int *dlen
,
1292 u32 compression_crc
)
1294 struct iaa_device_compression_mode
*active_compression_mode
;
1295 struct iaa_compression_ctx
*ctx
= crypto_tfm_ctx(tfm
);
1296 struct iaa_device
*iaa_device
;
1297 struct idxd_desc
*idxd_desc
;
1298 struct iax_hw_desc
*desc
;
1299 struct idxd_device
*idxd
;
1300 struct iaa_wq
*iaa_wq
;
1301 struct pci_dev
*pdev
;
1305 iaa_wq
= idxd_wq_get_private(wq
);
1306 iaa_device
= iaa_wq
->iaa_device
;
1307 idxd
= iaa_device
->idxd
;
1311 active_compression_mode
= get_iaa_device_compression_mode(iaa_device
, ctx
->mode
);
1313 idxd_desc
= idxd_alloc_desc(wq
, IDXD_OP_BLOCK
);
1314 if (IS_ERR(idxd_desc
)) {
1315 dev_dbg(dev
, "idxd descriptor allocation failed\n");
1316 dev_dbg(dev
, "iaa compress failed: ret=%ld\n",
1317 PTR_ERR(idxd_desc
));
1318 return PTR_ERR(idxd_desc
);
1320 desc
= idxd_desc
->iax_hw
;
1322 /* Verify (optional) - decompress and check crc, suppress dest write */
1324 desc
->flags
= IDXD_OP_FLAG_CRAV
| IDXD_OP_FLAG_RCR
| IDXD_OP_FLAG_CC
;
1325 desc
->opcode
= IAX_OPCODE_DECOMPRESS
;
1326 desc
->decompr_flags
= IAA_DECOMP_FLAGS
| IAA_DECOMP_SUPPRESS_OUTPUT
;
1329 desc
->src1_addr
= (u64
)dst_addr
;
1330 desc
->src1_size
= *dlen
;
1331 desc
->dst_addr
= (u64
)src_addr
;
1332 desc
->max_dst_size
= slen
;
1333 desc
->completion_addr
= idxd_desc
->compl_dma
;
1335 dev_dbg(dev
, "(verify) compression mode %s,"
1336 " desc->src1_addr %llx, desc->src1_size %d,"
1337 " desc->dst_addr %llx, desc->max_dst_size %d,"
1338 " desc->src2_addr %llx, desc->src2_size %d\n",
1339 active_compression_mode
->name
,
1340 desc
->src1_addr
, desc
->src1_size
, desc
->dst_addr
,
1341 desc
->max_dst_size
, desc
->src2_addr
, desc
->src2_size
);
1343 ret
= idxd_submit_desc(wq
, idxd_desc
);
1345 dev_dbg(dev
, "submit_desc (verify) failed ret=%d\n", ret
);
1349 ret
= check_completion(dev
, idxd_desc
->iax_completion
, false, false);
1351 dev_dbg(dev
, "(verify) check_completion failed ret=%d\n", ret
);
1355 if (compression_crc
!= idxd_desc
->iax_completion
->crc
) {
1357 dev_dbg(dev
, "(verify) iaa comp/decomp crc mismatch:"
1358 " comp=0x%x, decomp=0x%x\n", compression_crc
,
1359 idxd_desc
->iax_completion
->crc
);
1360 print_hex_dump(KERN_INFO
, "cmp-rec: ", DUMP_PREFIX_OFFSET
,
1361 8, 1, idxd_desc
->iax_completion
, 64, 0);
1365 idxd_free_desc(wq
, idxd_desc
);
1369 idxd_free_desc(wq
, idxd_desc
);
1370 dev_dbg(dev
, "iaa compress failed: ret=%d\n", ret
);
1375 static int iaa_decompress(struct crypto_tfm
*tfm
, struct acomp_req
*req
,
1377 dma_addr_t src_addr
, unsigned int slen
,
1378 dma_addr_t dst_addr
, unsigned int *dlen
,
1381 struct iaa_device_compression_mode
*active_compression_mode
;
1382 struct iaa_compression_ctx
*ctx
= crypto_tfm_ctx(tfm
);
1383 struct iaa_device
*iaa_device
;
1384 struct idxd_desc
*idxd_desc
;
1385 struct iax_hw_desc
*desc
;
1386 struct idxd_device
*idxd
;
1387 struct iaa_wq
*iaa_wq
;
1388 struct pci_dev
*pdev
;
1392 iaa_wq
= idxd_wq_get_private(wq
);
1393 iaa_device
= iaa_wq
->iaa_device
;
1394 idxd
= iaa_device
->idxd
;
1398 active_compression_mode
= get_iaa_device_compression_mode(iaa_device
, ctx
->mode
);
1400 idxd_desc
= idxd_alloc_desc(wq
, IDXD_OP_BLOCK
);
1401 if (IS_ERR(idxd_desc
)) {
1402 dev_dbg(dev
, "idxd descriptor allocation failed\n");
1403 dev_dbg(dev
, "iaa decompress failed: ret=%ld\n",
1404 PTR_ERR(idxd_desc
));
1405 return PTR_ERR(idxd_desc
);
1407 desc
= idxd_desc
->iax_hw
;
1409 desc
->flags
= IDXD_OP_FLAG_CRAV
| IDXD_OP_FLAG_RCR
| IDXD_OP_FLAG_CC
;
1410 desc
->opcode
= IAX_OPCODE_DECOMPRESS
;
1411 desc
->max_dst_size
= PAGE_SIZE
;
1412 desc
->decompr_flags
= IAA_DECOMP_FLAGS
;
1415 desc
->src1_addr
= (u64
)src_addr
;
1416 desc
->dst_addr
= (u64
)dst_addr
;
1417 desc
->max_dst_size
= *dlen
;
1418 desc
->src1_size
= slen
;
1419 desc
->completion_addr
= idxd_desc
->compl_dma
;
1421 if (ctx
->use_irq
&& !disable_async
) {
1422 desc
->flags
|= IDXD_OP_FLAG_RCI
;
1424 idxd_desc
->crypto
.req
= req
;
1425 idxd_desc
->crypto
.tfm
= tfm
;
1426 idxd_desc
->crypto
.src_addr
= src_addr
;
1427 idxd_desc
->crypto
.dst_addr
= dst_addr
;
1428 idxd_desc
->crypto
.compress
= false;
1430 dev_dbg(dev
, "%s: use_async_irq compression mode %s,"
1431 " src_addr %llx, dst_addr %llx\n", __func__
,
1432 active_compression_mode
->name
,
1433 src_addr
, dst_addr
);
1434 } else if (ctx
->async_mode
&& !disable_async
)
1435 req
->base
.data
= idxd_desc
;
1437 dev_dbg(dev
, "%s: decompression mode %s,"
1438 " desc->src1_addr %llx, desc->src1_size %d,"
1439 " desc->dst_addr %llx, desc->max_dst_size %d,"
1440 " desc->src2_addr %llx, desc->src2_size %d\n", __func__
,
1441 active_compression_mode
->name
,
1442 desc
->src1_addr
, desc
->src1_size
, desc
->dst_addr
,
1443 desc
->max_dst_size
, desc
->src2_addr
, desc
->src2_size
);
1445 ret
= idxd_submit_desc(wq
, idxd_desc
);
1447 dev_dbg(dev
, "submit_desc failed ret=%d\n", ret
);
1452 update_total_decomp_calls();
1453 update_wq_decomp_calls(wq
);
1455 if (ctx
->async_mode
&& !disable_async
) {
1457 dev_dbg(dev
, "%s: returning -EINPROGRESS\n", __func__
);
1461 ret
= check_completion(dev
, idxd_desc
->iax_completion
, false, false);
1463 dev_dbg(dev
, "%s: check_completion failed ret=%d\n", __func__
, ret
);
1464 if (idxd_desc
->iax_completion
->status
== IAA_ANALYTICS_ERROR
) {
1465 pr_warn("%s: falling back to deflate-generic decompress, "
1466 "analytics error code %x\n", __func__
,
1467 idxd_desc
->iax_completion
->error_code
);
1468 ret
= deflate_generic_decompress(req
);
1470 dev_dbg(dev
, "%s: deflate-generic failed ret=%d\n",
1478 req
->dlen
= idxd_desc
->iax_completion
->output_size
;
1483 if (!ctx
->async_mode
|| disable_async
)
1484 idxd_free_desc(wq
, idxd_desc
);
1487 update_total_decomp_bytes_in(slen
);
1488 update_wq_decomp_bytes(wq
, slen
);
1492 idxd_free_desc(wq
, idxd_desc
);
1493 dev_dbg(dev
, "iaa decompress failed: ret=%d\n", ret
);
1498 static int iaa_comp_acompress(struct acomp_req
*req
)
1500 struct iaa_compression_ctx
*compression_ctx
;
1501 struct crypto_tfm
*tfm
= req
->base
.tfm
;
1502 dma_addr_t src_addr
, dst_addr
;
1503 bool disable_async
= false;
1504 int nr_sgs
, cpu
, ret
= 0;
1505 struct iaa_wq
*iaa_wq
;
1506 u32 compression_crc
;
1511 compression_ctx
= crypto_tfm_ctx(tfm
);
1513 if (!iaa_crypto_enabled
) {
1514 pr_debug("iaa_crypto disabled, not compressing\n");
1518 if (!req
->src
|| !req
->slen
) {
1519 pr_debug("invalid src, not compressing\n");
1524 wq
= wq_table_next_wq(cpu
);
1527 pr_debug("no wq configured for cpu=%d\n", cpu
);
1531 ret
= iaa_wq_get(wq
);
1533 pr_debug("no wq available for cpu=%d\n", cpu
);
1537 iaa_wq
= idxd_wq_get_private(wq
);
1540 gfp_t flags
= req
->flags
& CRYPTO_TFM_REQ_MAY_SLEEP
? GFP_KERNEL
: GFP_ATOMIC
;
1542 /* incompressible data will always be < 2 * slen */
1543 req
->dlen
= 2 * req
->slen
;
1544 order
= order_base_2(round_up(req
->dlen
, PAGE_SIZE
) / PAGE_SIZE
);
1545 req
->dst
= sgl_alloc_order(req
->dlen
, order
, false, flags
, NULL
);
1551 disable_async
= true;
1554 dev
= &wq
->idxd
->pdev
->dev
;
1556 nr_sgs
= dma_map_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1557 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1558 dev_dbg(dev
, "couldn't map src sg for iaa device %d,"
1559 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1560 iaa_wq
->wq
->id
, ret
);
1564 src_addr
= sg_dma_address(req
->src
);
1565 dev_dbg(dev
, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1566 " req->slen %d, sg_dma_len(sg) %d\n", src_addr
, nr_sgs
,
1567 req
->src
, req
->slen
, sg_dma_len(req
->src
));
1569 nr_sgs
= dma_map_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1570 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1571 dev_dbg(dev
, "couldn't map dst sg for iaa device %d,"
1572 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1573 iaa_wq
->wq
->id
, ret
);
1577 dst_addr
= sg_dma_address(req
->dst
);
1578 dev_dbg(dev
, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1579 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr
, nr_sgs
,
1580 req
->dst
, req
->dlen
, sg_dma_len(req
->dst
));
1582 ret
= iaa_compress(tfm
, req
, wq
, src_addr
, req
->slen
, dst_addr
,
1583 &req
->dlen
, &compression_crc
, disable_async
);
1584 if (ret
== -EINPROGRESS
)
1587 if (!ret
&& compression_ctx
->verify_compress
) {
1588 ret
= iaa_remap_for_verify(dev
, iaa_wq
, req
, &src_addr
, &dst_addr
);
1590 dev_dbg(dev
, "%s: compress verify remap failed ret=%d\n", __func__
, ret
);
1594 ret
= iaa_compress_verify(tfm
, req
, wq
, src_addr
, req
->slen
,
1595 dst_addr
, &req
->dlen
, compression_crc
);
1597 dev_dbg(dev
, "asynchronous compress verification failed ret=%d\n", ret
);
1599 dma_unmap_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_TO_DEVICE
);
1600 dma_unmap_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_FROM_DEVICE
);
1606 dev_dbg(dev
, "asynchronous compress failed ret=%d\n", ret
);
1608 dma_unmap_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1610 dma_unmap_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1615 sgl_free_order(req
->dst
, order
);
1620 static int iaa_comp_adecompress_alloc_dest(struct acomp_req
*req
)
1622 gfp_t flags
= req
->flags
& CRYPTO_TFM_REQ_MAY_SLEEP
?
1623 GFP_KERNEL
: GFP_ATOMIC
;
1624 struct crypto_tfm
*tfm
= req
->base
.tfm
;
1625 dma_addr_t src_addr
, dst_addr
;
1626 int nr_sgs
, cpu
, ret
= 0;
1627 struct iaa_wq
*iaa_wq
;
1633 wq
= wq_table_next_wq(cpu
);
1636 pr_debug("no wq configured for cpu=%d\n", cpu
);
1640 ret
= iaa_wq_get(wq
);
1642 pr_debug("no wq available for cpu=%d\n", cpu
);
1646 iaa_wq
= idxd_wq_get_private(wq
);
1648 dev
= &wq
->idxd
->pdev
->dev
;
1650 nr_sgs
= dma_map_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1651 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1652 dev_dbg(dev
, "couldn't map src sg for iaa device %d,"
1653 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1654 iaa_wq
->wq
->id
, ret
);
1658 src_addr
= sg_dma_address(req
->src
);
1659 dev_dbg(dev
, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1660 " req->slen %d, sg_dma_len(sg) %d\n", src_addr
, nr_sgs
,
1661 req
->src
, req
->slen
, sg_dma_len(req
->src
));
1663 req
->dlen
= 4 * req
->slen
; /* start with ~avg comp rato */
1665 order
= order_base_2(round_up(req
->dlen
, PAGE_SIZE
) / PAGE_SIZE
);
1666 req
->dst
= sgl_alloc_order(req
->dlen
, order
, false, flags
, NULL
);
1673 nr_sgs
= dma_map_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1674 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1675 dev_dbg(dev
, "couldn't map dst sg for iaa device %d,"
1676 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1677 iaa_wq
->wq
->id
, ret
);
1682 dst_addr
= sg_dma_address(req
->dst
);
1683 dev_dbg(dev
, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1684 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr
, nr_sgs
,
1685 req
->dst
, req
->dlen
, sg_dma_len(req
->dst
));
1686 ret
= iaa_decompress(tfm
, req
, wq
, src_addr
, req
->slen
,
1687 dst_addr
, &req
->dlen
, true);
1688 if (ret
== -EOVERFLOW
) {
1689 dma_unmap_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1691 if (req
->dlen
> CRYPTO_ACOMP_DST_MAX
)
1697 dev_dbg(dev
, "asynchronous decompress failed ret=%d\n", ret
);
1699 dma_unmap_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1701 dma_unmap_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1706 sgl_free_order(req
->dst
, order
);
1711 static int iaa_comp_adecompress(struct acomp_req
*req
)
1713 struct crypto_tfm
*tfm
= req
->base
.tfm
;
1714 dma_addr_t src_addr
, dst_addr
;
1715 int nr_sgs
, cpu
, ret
= 0;
1716 struct iaa_wq
*iaa_wq
;
1720 if (!iaa_crypto_enabled
) {
1721 pr_debug("iaa_crypto disabled, not decompressing\n");
1725 if (!req
->src
|| !req
->slen
) {
1726 pr_debug("invalid src, not decompressing\n");
1731 return iaa_comp_adecompress_alloc_dest(req
);
1734 wq
= wq_table_next_wq(cpu
);
1737 pr_debug("no wq configured for cpu=%d\n", cpu
);
1741 ret
= iaa_wq_get(wq
);
1743 pr_debug("no wq available for cpu=%d\n", cpu
);
1747 iaa_wq
= idxd_wq_get_private(wq
);
1749 dev
= &wq
->idxd
->pdev
->dev
;
1751 nr_sgs
= dma_map_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1752 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1753 dev_dbg(dev
, "couldn't map src sg for iaa device %d,"
1754 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1755 iaa_wq
->wq
->id
, ret
);
1759 src_addr
= sg_dma_address(req
->src
);
1760 dev_dbg(dev
, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1761 " req->slen %d, sg_dma_len(sg) %d\n", src_addr
, nr_sgs
,
1762 req
->src
, req
->slen
, sg_dma_len(req
->src
));
1764 nr_sgs
= dma_map_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1765 if (nr_sgs
<= 0 || nr_sgs
> 1) {
1766 dev_dbg(dev
, "couldn't map dst sg for iaa device %d,"
1767 " wq %d: ret=%d\n", iaa_wq
->iaa_device
->idxd
->id
,
1768 iaa_wq
->wq
->id
, ret
);
1772 dst_addr
= sg_dma_address(req
->dst
);
1773 dev_dbg(dev
, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1774 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr
, nr_sgs
,
1775 req
->dst
, req
->dlen
, sg_dma_len(req
->dst
));
1777 ret
= iaa_decompress(tfm
, req
, wq
, src_addr
, req
->slen
,
1778 dst_addr
, &req
->dlen
, false);
1779 if (ret
== -EINPROGRESS
)
1783 dev_dbg(dev
, "asynchronous decompress failed ret=%d\n", ret
);
1785 dma_unmap_sg(dev
, req
->dst
, sg_nents(req
->dst
), DMA_FROM_DEVICE
);
1787 dma_unmap_sg(dev
, req
->src
, sg_nents(req
->src
), DMA_TO_DEVICE
);
1794 static void compression_ctx_init(struct iaa_compression_ctx
*ctx
)
1796 ctx
->verify_compress
= iaa_verify_compress
;
1797 ctx
->async_mode
= async_mode
;
1798 ctx
->use_irq
= use_irq
;
1801 static int iaa_comp_init_fixed(struct crypto_acomp
*acomp_tfm
)
1803 struct crypto_tfm
*tfm
= crypto_acomp_tfm(acomp_tfm
);
1804 struct iaa_compression_ctx
*ctx
= crypto_tfm_ctx(tfm
);
1806 compression_ctx_init(ctx
);
1808 ctx
->mode
= IAA_MODE_FIXED
;
1813 static void dst_free(struct scatterlist
*sgl
)
1816 * Called for req->dst = NULL cases but we free elsewhere
1817 * using sgl_free_order().
1821 static struct acomp_alg iaa_acomp_fixed_deflate
= {
1822 .init
= iaa_comp_init_fixed
,
1823 .compress
= iaa_comp_acompress
,
1824 .decompress
= iaa_comp_adecompress
,
1825 .dst_free
= dst_free
,
1827 .cra_name
= "deflate",
1828 .cra_driver_name
= "deflate-iaa",
1829 .cra_flags
= CRYPTO_ALG_ASYNC
,
1830 .cra_ctxsize
= sizeof(struct iaa_compression_ctx
),
1831 .cra_module
= THIS_MODULE
,
1832 .cra_priority
= IAA_ALG_PRIORITY
,
1836 static int iaa_register_compression_device(void)
1840 ret
= crypto_register_acomp(&iaa_acomp_fixed_deflate
);
1842 pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret
);
1846 iaa_crypto_registered
= true;
1851 static int iaa_unregister_compression_device(void)
1853 if (iaa_crypto_registered
)
1854 crypto_unregister_acomp(&iaa_acomp_fixed_deflate
);
1859 static int iaa_crypto_probe(struct idxd_dev
*idxd_dev
)
1861 struct idxd_wq
*wq
= idxd_dev_to_wq(idxd_dev
);
1862 struct idxd_device
*idxd
= wq
->idxd
;
1863 struct idxd_driver_data
*data
= idxd
->data
;
1864 struct device
*dev
= &idxd_dev
->conf_dev
;
1865 bool first_wq
= false;
1868 if (idxd
->state
!= IDXD_DEV_ENABLED
)
1871 if (data
->type
!= IDXD_TYPE_IAX
)
1874 mutex_lock(&wq
->wq_lock
);
1876 if (idxd_wq_get_private(wq
)) {
1877 mutex_unlock(&wq
->wq_lock
);
1881 if (!idxd_wq_driver_name_match(wq
, dev
)) {
1882 dev_dbg(dev
, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n",
1883 idxd
->id
, wq
->id
, wq
->driver_name
, dev
->driver
->name
);
1884 idxd
->cmd_status
= IDXD_SCMD_WQ_NO_DRV_NAME
;
1889 wq
->type
= IDXD_WQT_KERNEL
;
1891 ret
= idxd_drv_enable_wq(wq
);
1893 dev_dbg(dev
, "enable wq %d.%d failed: %d\n",
1894 idxd
->id
, wq
->id
, ret
);
1899 mutex_lock(&iaa_devices_lock
);
1901 if (list_empty(&iaa_devices
)) {
1902 ret
= alloc_wq_table(wq
->idxd
->max_wqs
);
1908 ret
= save_iaa_wq(wq
);
1912 rebalance_wq_table();
1915 iaa_crypto_enabled
= true;
1916 ret
= iaa_register_compression_device();
1918 iaa_crypto_enabled
= false;
1919 dev_dbg(dev
, "IAA compression device registration failed\n");
1922 try_module_get(THIS_MODULE
);
1924 pr_info("iaa_crypto now ENABLED\n");
1927 mutex_unlock(&iaa_devices_lock
);
1929 mutex_unlock(&wq
->wq_lock
);
1935 free_iaa_wq(idxd_wq_get_private(wq
));
1940 mutex_unlock(&iaa_devices_lock
);
1941 idxd_drv_disable_wq(wq
);
1943 wq
->type
= IDXD_WQT_NONE
;
1948 static void iaa_crypto_remove(struct idxd_dev
*idxd_dev
)
1950 struct idxd_wq
*wq
= idxd_dev_to_wq(idxd_dev
);
1951 struct idxd_device
*idxd
= wq
->idxd
;
1952 struct iaa_wq
*iaa_wq
;
1955 idxd_wq_quiesce(wq
);
1957 mutex_lock(&wq
->wq_lock
);
1958 mutex_lock(&iaa_devices_lock
);
1962 spin_lock(&idxd
->dev_lock
);
1963 iaa_wq
= idxd_wq_get_private(wq
);
1965 spin_unlock(&idxd
->dev_lock
);
1966 pr_err("%s: no iaa_wq available to remove\n", __func__
);
1971 iaa_wq
->remove
= true;
1974 idxd_wq_set_private(wq
, NULL
);
1977 spin_unlock(&idxd
->dev_lock
);
1979 __free_iaa_wq(iaa_wq
);
1983 idxd_drv_disable_wq(wq
);
1984 rebalance_wq_table();
1987 iaa_crypto_enabled
= false;
1989 module_put(THIS_MODULE
);
1991 pr_info("iaa_crypto now DISABLED\n");
1994 mutex_unlock(&iaa_devices_lock
);
1995 mutex_unlock(&wq
->wq_lock
);
1998 static enum idxd_dev_type dev_types
[] = {
2003 static struct idxd_device_driver iaa_crypto_driver
= {
2004 .probe
= iaa_crypto_probe
,
2005 .remove
= iaa_crypto_remove
,
2006 .name
= IDXD_SUBDRIVER_NAME
,
2008 .desc_complete
= iaa_desc_complete
,
2011 static int __init
iaa_crypto_init_module(void)
2016 nr_cpus
= num_possible_cpus();
2017 for_each_node_with_cpus(node
)
2020 pr_err("IAA couldn't find any nodes with cpus\n");
2023 nr_cpus_per_node
= nr_cpus
/ nr_nodes
;
2025 if (crypto_has_comp("deflate-generic", 0, 0))
2026 deflate_generic_tfm
= crypto_alloc_comp("deflate-generic", 0, 0);
2028 if (IS_ERR_OR_NULL(deflate_generic_tfm
)) {
2029 pr_err("IAA could not alloc %s tfm: errcode = %ld\n",
2030 "deflate-generic", PTR_ERR(deflate_generic_tfm
));
2034 ret
= iaa_aecs_init_fixed();
2036 pr_debug("IAA fixed compression mode init failed\n");
2040 ret
= idxd_driver_register(&iaa_crypto_driver
);
2042 pr_debug("IAA wq sub-driver registration failed\n");
2043 goto err_driver_reg
;
2046 ret
= driver_create_file(&iaa_crypto_driver
.drv
,
2047 &driver_attr_verify_compress
);
2049 pr_debug("IAA verify_compress attr creation failed\n");
2050 goto err_verify_attr_create
;
2053 ret
= driver_create_file(&iaa_crypto_driver
.drv
,
2054 &driver_attr_sync_mode
);
2056 pr_debug("IAA sync mode attr creation failed\n");
2057 goto err_sync_attr_create
;
2060 if (iaa_crypto_debugfs_init())
2061 pr_warn("debugfs init failed, stats not available\n");
2063 pr_debug("initialized\n");
2067 err_sync_attr_create
:
2068 driver_remove_file(&iaa_crypto_driver
.drv
,
2069 &driver_attr_verify_compress
);
2070 err_verify_attr_create
:
2071 idxd_driver_unregister(&iaa_crypto_driver
);
2073 iaa_aecs_cleanup_fixed();
2075 crypto_free_comp(deflate_generic_tfm
);
2080 static void __exit
iaa_crypto_cleanup_module(void)
2082 if (iaa_unregister_compression_device())
2083 pr_debug("IAA compression device unregister failed\n");
2085 iaa_crypto_debugfs_cleanup();
2086 driver_remove_file(&iaa_crypto_driver
.drv
,
2087 &driver_attr_sync_mode
);
2088 driver_remove_file(&iaa_crypto_driver
.drv
,
2089 &driver_attr_verify_compress
);
2090 idxd_driver_unregister(&iaa_crypto_driver
);
2091 iaa_aecs_cleanup_fixed();
2092 crypto_free_comp(deflate_generic_tfm
);
2094 pr_debug("cleaned up\n");
2097 MODULE_IMPORT_NS("IDXD");
2098 MODULE_LICENSE("GPL");
2099 MODULE_ALIAS_IDXD_DEVICE(0);
2100 MODULE_AUTHOR("Intel Corporation");
2101 MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver");
2103 module_init(iaa_crypto_init_module
);
2104 module_exit(iaa_crypto_cleanup_module
);