1 // SPDX-License-Identifier: GPL-2.0
4 * PCIe r6.0, sec 6.30 DOE
6 * Copyright (C) 2021 Huawei
7 * Jonathan Cameron <Jonathan.Cameron@huawei.com>
9 * Copyright (C) 2022 Intel Corporation
10 * Ira Weiny <ira.weiny@intel.com>
13 #define dev_fmt(fmt) "DOE: " fmt
15 #include <linux/bitfield.h>
16 #include <linux/delay.h>
17 #include <linux/jiffies.h>
18 #include <linux/mutex.h>
19 #include <linux/pci.h>
20 #include <linux/pci-doe.h>
21 #include <linux/workqueue.h>
25 #define PCI_DOE_PROTOCOL_DISCOVERY 0
27 /* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
28 #define PCI_DOE_TIMEOUT HZ
29 #define PCI_DOE_POLL_INTERVAL (PCI_DOE_TIMEOUT / 128)
31 #define PCI_DOE_FLAG_CANCEL 0
32 #define PCI_DOE_FLAG_DEAD 1
34 /* Max data object length is 2^18 dwords */
35 #define PCI_DOE_MAX_LENGTH (1 << 18)
38 * struct pci_doe_mb - State for a single DOE mailbox
40 * This state is used to manage a single DOE mailbox capability. All fields
41 * should be considered opaque to the consumers and the structure passed into
42 * the helpers below after being created by pci_doe_create_mb().
44 * @pdev: PCI device this mailbox belongs to
45 * @cap_offset: Capability offset
46 * @prots: Array of protocols supported (encoded as long values)
47 * @wq: Wait queue for work item
48 * @work_queue: Queue of pci_doe_work items
49 * @flags: Bit array of PCI_DOE_FLAG_* flags
57 struct workqueue_struct
*work_queue
;
61 struct pci_doe_protocol
{
67 * struct pci_doe_task - represents a single query/response
70 * @request_pl: The request payload
71 * @request_pl_sz: Size of the request payload (bytes)
72 * @response_pl: The response payload
73 * @response_pl_sz: Size of the response payload (bytes)
74 * @rv: Return value. Length of received response or error (bytes)
75 * @complete: Called when task is complete
76 * @private: Private data for the consumer
77 * @work: Used internally by the mailbox
78 * @doe_mb: Used internally by the mailbox
81 struct pci_doe_protocol prot
;
82 const __le32
*request_pl
;
85 size_t response_pl_sz
;
87 void (*complete
)(struct pci_doe_task
*task
);
90 /* initialized by pci_doe_submit_task() */
91 struct work_struct work
;
92 struct pci_doe_mb
*doe_mb
;
95 static int pci_doe_wait(struct pci_doe_mb
*doe_mb
, unsigned long timeout
)
97 if (wait_event_timeout(doe_mb
->wq
,
98 test_bit(PCI_DOE_FLAG_CANCEL
, &doe_mb
->flags
),
104 static void pci_doe_write_ctrl(struct pci_doe_mb
*doe_mb
, u32 val
)
106 struct pci_dev
*pdev
= doe_mb
->pdev
;
107 int offset
= doe_mb
->cap_offset
;
109 pci_write_config_dword(pdev
, offset
+ PCI_DOE_CTRL
, val
);
112 static int pci_doe_abort(struct pci_doe_mb
*doe_mb
)
114 struct pci_dev
*pdev
= doe_mb
->pdev
;
115 int offset
= doe_mb
->cap_offset
;
116 unsigned long timeout_jiffies
;
118 pci_dbg(pdev
, "[%x] Issuing Abort\n", offset
);
120 timeout_jiffies
= jiffies
+ PCI_DOE_TIMEOUT
;
121 pci_doe_write_ctrl(doe_mb
, PCI_DOE_CTRL_ABORT
);
127 rc
= pci_doe_wait(doe_mb
, PCI_DOE_POLL_INTERVAL
);
130 pci_read_config_dword(pdev
, offset
+ PCI_DOE_STATUS
, &val
);
133 if (!FIELD_GET(PCI_DOE_STATUS_ERROR
, val
) &&
134 !FIELD_GET(PCI_DOE_STATUS_BUSY
, val
))
137 } while (!time_after(jiffies
, timeout_jiffies
));
139 /* Abort has timed out and the MB is dead */
140 pci_err(pdev
, "[%x] ABORT timed out\n", offset
);
144 static int pci_doe_send_req(struct pci_doe_mb
*doe_mb
,
145 struct pci_doe_task
*task
)
147 struct pci_dev
*pdev
= doe_mb
->pdev
;
148 int offset
= doe_mb
->cap_offset
;
149 unsigned long timeout_jiffies
;
150 size_t length
, remainder
;
155 * Check the DOE busy bit is not set. If it is set, this could indicate
156 * someone other than Linux (e.g. firmware) is using the mailbox. Note
157 * it is expected that firmware and OS will negotiate access rights via
158 * an, as yet to be defined, method.
160 * Wait up to one PCI_DOE_TIMEOUT period to allow the prior command to
161 * finish. Otherwise, simply error out as unable to field the request.
163 * PCIe r6.2 sec 6.30.3 states no interrupt is raised when the DOE Busy
164 * bit is cleared, so polling here is our best option for the moment.
166 timeout_jiffies
= jiffies
+ PCI_DOE_TIMEOUT
;
168 pci_read_config_dword(pdev
, offset
+ PCI_DOE_STATUS
, &val
);
169 } while (FIELD_GET(PCI_DOE_STATUS_BUSY
, val
) &&
170 !time_after(jiffies
, timeout_jiffies
));
172 if (FIELD_GET(PCI_DOE_STATUS_BUSY
, val
))
175 if (FIELD_GET(PCI_DOE_STATUS_ERROR
, val
))
178 /* Length is 2 DW of header + length of payload in DW */
179 length
= 2 + DIV_ROUND_UP(task
->request_pl_sz
, sizeof(__le32
));
180 if (length
> PCI_DOE_MAX_LENGTH
)
182 if (length
== PCI_DOE_MAX_LENGTH
)
185 /* Write DOE Header */
186 val
= FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID
, task
->prot
.vid
) |
187 FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE
, task
->prot
.type
);
188 pci_write_config_dword(pdev
, offset
+ PCI_DOE_WRITE
, val
);
189 pci_write_config_dword(pdev
, offset
+ PCI_DOE_WRITE
,
190 FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH
,
194 for (i
= 0; i
< task
->request_pl_sz
/ sizeof(__le32
); i
++)
195 pci_write_config_dword(pdev
, offset
+ PCI_DOE_WRITE
,
196 le32_to_cpu(task
->request_pl
[i
]));
198 /* Write last payload dword */
199 remainder
= task
->request_pl_sz
% sizeof(__le32
);
202 memcpy(&val
, &task
->request_pl
[i
], remainder
);
204 pci_write_config_dword(pdev
, offset
+ PCI_DOE_WRITE
, val
);
207 pci_doe_write_ctrl(doe_mb
, PCI_DOE_CTRL_GO
);
212 static bool pci_doe_data_obj_ready(struct pci_doe_mb
*doe_mb
)
214 struct pci_dev
*pdev
= doe_mb
->pdev
;
215 int offset
= doe_mb
->cap_offset
;
218 pci_read_config_dword(pdev
, offset
+ PCI_DOE_STATUS
, &val
);
219 if (FIELD_GET(PCI_DOE_STATUS_DATA_OBJECT_READY
, val
))
224 static int pci_doe_recv_resp(struct pci_doe_mb
*doe_mb
, struct pci_doe_task
*task
)
226 size_t length
, payload_length
, remainder
, received
;
227 struct pci_dev
*pdev
= doe_mb
->pdev
;
228 int offset
= doe_mb
->cap_offset
;
232 /* Read the first dword to get the protocol */
233 pci_read_config_dword(pdev
, offset
+ PCI_DOE_READ
, &val
);
234 if ((FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID
, val
) != task
->prot
.vid
) ||
235 (FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE
, val
) != task
->prot
.type
)) {
236 dev_err_ratelimited(&pdev
->dev
, "[%x] expected [VID, Protocol] = [%04x, %02x], got [%04x, %02x]\n",
237 doe_mb
->cap_offset
, task
->prot
.vid
, task
->prot
.type
,
238 FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID
, val
),
239 FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE
, val
));
243 pci_write_config_dword(pdev
, offset
+ PCI_DOE_READ
, 0);
244 /* Read the second dword to get the length */
245 pci_read_config_dword(pdev
, offset
+ PCI_DOE_READ
, &val
);
246 pci_write_config_dword(pdev
, offset
+ PCI_DOE_READ
, 0);
248 length
= FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH
, val
);
249 /* A value of 0x0 indicates max data object length */
251 length
= PCI_DOE_MAX_LENGTH
;
255 /* First 2 dwords have already been read */
257 received
= task
->response_pl_sz
;
258 payload_length
= DIV_ROUND_UP(task
->response_pl_sz
, sizeof(__le32
));
259 remainder
= task
->response_pl_sz
% sizeof(__le32
);
261 /* remainder signifies number of data bytes in last payload dword */
263 remainder
= sizeof(__le32
);
265 if (length
< payload_length
) {
266 received
= length
* sizeof(__le32
);
267 payload_length
= length
;
268 remainder
= sizeof(__le32
);
271 if (payload_length
) {
272 /* Read all payload dwords except the last */
273 for (; i
< payload_length
- 1; i
++) {
274 pci_read_config_dword(pdev
, offset
+ PCI_DOE_READ
,
276 task
->response_pl
[i
] = cpu_to_le32(val
);
277 pci_write_config_dword(pdev
, offset
+ PCI_DOE_READ
, 0);
280 /* Read last payload dword */
281 pci_read_config_dword(pdev
, offset
+ PCI_DOE_READ
, &val
);
283 memcpy(&task
->response_pl
[i
], &val
, remainder
);
284 /* Prior to the last ack, ensure Data Object Ready */
285 if (!pci_doe_data_obj_ready(doe_mb
))
287 pci_write_config_dword(pdev
, offset
+ PCI_DOE_READ
, 0);
291 /* Flush excess length */
292 for (; i
< length
; i
++) {
293 pci_read_config_dword(pdev
, offset
+ PCI_DOE_READ
, &val
);
294 pci_write_config_dword(pdev
, offset
+ PCI_DOE_READ
, 0);
297 /* Final error check to pick up on any since Data Object Ready */
298 pci_read_config_dword(pdev
, offset
+ PCI_DOE_STATUS
, &val
);
299 if (FIELD_GET(PCI_DOE_STATUS_ERROR
, val
))
305 static void signal_task_complete(struct pci_doe_task
*task
, int rv
)
308 destroy_work_on_stack(&task
->work
);
309 task
->complete(task
);
312 static void signal_task_abort(struct pci_doe_task
*task
, int rv
)
314 struct pci_doe_mb
*doe_mb
= task
->doe_mb
;
315 struct pci_dev
*pdev
= doe_mb
->pdev
;
317 if (pci_doe_abort(doe_mb
)) {
319 * If the device can't process an abort; set the mailbox dead
320 * - no more submissions
322 pci_err(pdev
, "[%x] Abort failed marking mailbox dead\n",
324 set_bit(PCI_DOE_FLAG_DEAD
, &doe_mb
->flags
);
326 signal_task_complete(task
, rv
);
329 static void doe_statemachine_work(struct work_struct
*work
)
331 struct pci_doe_task
*task
= container_of(work
, struct pci_doe_task
,
333 struct pci_doe_mb
*doe_mb
= task
->doe_mb
;
334 struct pci_dev
*pdev
= doe_mb
->pdev
;
335 int offset
= doe_mb
->cap_offset
;
336 unsigned long timeout_jiffies
;
340 if (test_bit(PCI_DOE_FLAG_DEAD
, &doe_mb
->flags
)) {
341 signal_task_complete(task
, -EIO
);
346 rc
= pci_doe_send_req(doe_mb
, task
);
349 * The specification does not provide any guidance on how to
350 * resolve conflicting requests from other entities.
351 * Furthermore, it is likely that busy will not be detected
352 * most of the time. Flag any detection of status busy with an
356 dev_err_ratelimited(&pdev
->dev
, "[%x] busy detected; another entity is sending conflicting requests\n",
358 signal_task_abort(task
, rc
);
362 timeout_jiffies
= jiffies
+ PCI_DOE_TIMEOUT
;
363 /* Poll for response */
365 pci_read_config_dword(pdev
, offset
+ PCI_DOE_STATUS
, &val
);
366 if (FIELD_GET(PCI_DOE_STATUS_ERROR
, val
)) {
367 signal_task_abort(task
, -EIO
);
371 if (!FIELD_GET(PCI_DOE_STATUS_DATA_OBJECT_READY
, val
)) {
372 if (time_after(jiffies
, timeout_jiffies
)) {
373 signal_task_abort(task
, -EIO
);
376 rc
= pci_doe_wait(doe_mb
, PCI_DOE_POLL_INTERVAL
);
378 signal_task_abort(task
, rc
);
384 rc
= pci_doe_recv_resp(doe_mb
, task
);
386 signal_task_abort(task
, rc
);
390 signal_task_complete(task
, rc
);
393 static void pci_doe_task_complete(struct pci_doe_task
*task
)
395 complete(task
->private);
398 static int pci_doe_discovery(struct pci_doe_mb
*doe_mb
, u8 capver
, u8
*index
, u16
*vid
,
401 u32 request_pl
= FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX
,
403 FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_VER
,
404 (capver
>= 2) ? 2 : 0);
405 __le32 request_pl_le
= cpu_to_le32(request_pl
);
406 __le32 response_pl_le
;
410 rc
= pci_doe(doe_mb
, PCI_VENDOR_ID_PCI_SIG
, PCI_DOE_PROTOCOL_DISCOVERY
,
411 &request_pl_le
, sizeof(request_pl_le
),
412 &response_pl_le
, sizeof(response_pl_le
));
416 if (rc
!= sizeof(response_pl_le
))
419 response_pl
= le32_to_cpu(response_pl_le
);
420 *vid
= FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID
, response_pl
);
421 *protocol
= FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL
,
423 *index
= FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX
,
429 static void *pci_doe_xa_prot_entry(u16 vid
, u8 prot
)
431 return xa_mk_value((vid
<< 8) | prot
);
434 static int pci_doe_cache_protocols(struct pci_doe_mb
*doe_mb
)
440 pci_read_config_dword(doe_mb
->pdev
, doe_mb
->cap_offset
, &hdr
);
447 rc
= pci_doe_discovery(doe_mb
, PCI_EXT_CAP_VER(hdr
), &index
,
452 pci_dbg(doe_mb
->pdev
,
453 "[%x] Found protocol %d vid: %x prot: %x\n",
454 doe_mb
->cap_offset
, xa_idx
, vid
, prot
);
456 rc
= xa_insert(&doe_mb
->prots
, xa_idx
++,
457 pci_doe_xa_prot_entry(vid
, prot
), GFP_KERNEL
);
465 static void pci_doe_cancel_tasks(struct pci_doe_mb
*doe_mb
)
467 /* Stop all pending work items from starting */
468 set_bit(PCI_DOE_FLAG_DEAD
, &doe_mb
->flags
);
470 /* Cancel an in progress work item, if necessary */
471 set_bit(PCI_DOE_FLAG_CANCEL
, &doe_mb
->flags
);
472 wake_up(&doe_mb
->wq
);
476 * pci_doe_create_mb() - Create a DOE mailbox object
478 * @pdev: PCI device to create the DOE mailbox for
479 * @cap_offset: Offset of the DOE mailbox
481 * Create a single mailbox object to manage the mailbox protocol at the
482 * cap_offset specified.
484 * RETURNS: created mailbox object on success
485 * ERR_PTR(-errno) on failure
487 static struct pci_doe_mb
*pci_doe_create_mb(struct pci_dev
*pdev
,
490 struct pci_doe_mb
*doe_mb
;
493 doe_mb
= kzalloc(sizeof(*doe_mb
), GFP_KERNEL
);
495 return ERR_PTR(-ENOMEM
);
498 doe_mb
->cap_offset
= cap_offset
;
499 init_waitqueue_head(&doe_mb
->wq
);
500 xa_init(&doe_mb
->prots
);
502 doe_mb
->work_queue
= alloc_ordered_workqueue("%s %s DOE [%x]", 0,
503 dev_bus_name(&pdev
->dev
),
506 if (!doe_mb
->work_queue
) {
507 pci_err(pdev
, "[%x] failed to allocate work queue\n",
513 /* Reset the mailbox by issuing an abort */
514 rc
= pci_doe_abort(doe_mb
);
516 pci_err(pdev
, "[%x] failed to reset mailbox with abort command : %d\n",
517 doe_mb
->cap_offset
, rc
);
522 * The state machine and the mailbox should be in sync now;
523 * Use the mailbox to query protocols.
525 rc
= pci_doe_cache_protocols(doe_mb
);
527 pci_err(pdev
, "[%x] failed to cache protocols : %d\n",
528 doe_mb
->cap_offset
, rc
);
535 pci_doe_cancel_tasks(doe_mb
);
536 xa_destroy(&doe_mb
->prots
);
538 destroy_workqueue(doe_mb
->work_queue
);
545 * pci_doe_destroy_mb() - Destroy a DOE mailbox object
547 * @doe_mb: DOE mailbox
549 * Destroy all internal data structures created for the DOE mailbox.
551 static void pci_doe_destroy_mb(struct pci_doe_mb
*doe_mb
)
553 pci_doe_cancel_tasks(doe_mb
);
554 xa_destroy(&doe_mb
->prots
);
555 destroy_workqueue(doe_mb
->work_queue
);
560 * pci_doe_supports_prot() - Return if the DOE instance supports the given
562 * @doe_mb: DOE mailbox capability to query
563 * @vid: Protocol Vendor ID
564 * @type: Protocol type
566 * RETURNS: True if the DOE mailbox supports the protocol specified
568 static bool pci_doe_supports_prot(struct pci_doe_mb
*doe_mb
, u16 vid
, u8 type
)
573 /* The discovery protocol must always be supported */
574 if (vid
== PCI_VENDOR_ID_PCI_SIG
&& type
== PCI_DOE_PROTOCOL_DISCOVERY
)
577 xa_for_each(&doe_mb
->prots
, index
, entry
)
578 if (entry
== pci_doe_xa_prot_entry(vid
, type
))
585 * pci_doe_submit_task() - Submit a task to be processed by the state machine
587 * @doe_mb: DOE mailbox capability to submit to
588 * @task: task to be queued
590 * Submit a DOE task (request/response) to the DOE mailbox to be processed.
591 * Returns upon queueing the task object. If the queue is full this function
592 * will sleep until there is room in the queue.
594 * task->complete will be called when the state machine is done processing this
597 * @task must be allocated on the stack.
599 * Excess data will be discarded.
601 * RETURNS: 0 when task has been successfully queued, -ERRNO on error
603 static int pci_doe_submit_task(struct pci_doe_mb
*doe_mb
,
604 struct pci_doe_task
*task
)
606 if (!pci_doe_supports_prot(doe_mb
, task
->prot
.vid
, task
->prot
.type
))
609 if (test_bit(PCI_DOE_FLAG_DEAD
, &doe_mb
->flags
))
612 task
->doe_mb
= doe_mb
;
613 INIT_WORK_ONSTACK(&task
->work
, doe_statemachine_work
);
614 queue_work(doe_mb
->work_queue
, &task
->work
);
619 * pci_doe() - Perform Data Object Exchange
621 * @doe_mb: DOE Mailbox
623 * @type: Data Object Type
624 * @request: Request payload
625 * @request_sz: Size of request payload (bytes)
626 * @response: Response payload
627 * @response_sz: Size of response payload (bytes)
629 * Submit @request to @doe_mb and store the @response.
630 * The DOE exchange is performed synchronously and may therefore sleep.
632 * Payloads are treated as opaque byte streams which are transmitted verbatim,
633 * without byte-swapping. If payloads contain little-endian register values,
634 * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu().
636 * For convenience, arbitrary payload sizes are allowed even though PCIe r6.0
637 * sec 6.30.1 specifies the Data Object Header 2 "Length" in dwords. The last
638 * (partial) dword is copied with byte granularity and padded with zeroes if
639 * necessary. Callers are thus relieved of using dword-sized bounce buffers.
641 * RETURNS: Length of received response or negative errno.
642 * Received data in excess of @response_sz is discarded.
643 * The length may be smaller than @response_sz and the caller
644 * is responsible for checking that.
646 int pci_doe(struct pci_doe_mb
*doe_mb
, u16 vendor
, u8 type
,
647 const void *request
, size_t request_sz
,
648 void *response
, size_t response_sz
)
650 DECLARE_COMPLETION_ONSTACK(c
);
651 struct pci_doe_task task
= {
654 .request_pl
= request
,
655 .request_pl_sz
= request_sz
,
656 .response_pl
= response
,
657 .response_pl_sz
= response_sz
,
658 .complete
= pci_doe_task_complete
,
663 rc
= pci_doe_submit_task(doe_mb
, &task
);
667 wait_for_completion(&c
);
671 EXPORT_SYMBOL_GPL(pci_doe
);
674 * pci_find_doe_mailbox() - Find Data Object Exchange mailbox
678 * @type: Data Object Type
680 * Find first DOE mailbox of a PCI device which supports the given protocol.
682 * RETURNS: Pointer to the DOE mailbox or NULL if none was found.
684 struct pci_doe_mb
*pci_find_doe_mailbox(struct pci_dev
*pdev
, u16 vendor
,
687 struct pci_doe_mb
*doe_mb
;
690 xa_for_each(&pdev
->doe_mbs
, index
, doe_mb
)
691 if (pci_doe_supports_prot(doe_mb
, vendor
, type
))
696 EXPORT_SYMBOL_GPL(pci_find_doe_mailbox
);
698 void pci_doe_init(struct pci_dev
*pdev
)
700 struct pci_doe_mb
*doe_mb
;
704 xa_init(&pdev
->doe_mbs
);
706 while ((offset
= pci_find_next_ext_capability(pdev
, offset
,
707 PCI_EXT_CAP_ID_DOE
))) {
708 doe_mb
= pci_doe_create_mb(pdev
, offset
);
709 if (IS_ERR(doe_mb
)) {
710 pci_err(pdev
, "[%x] failed to create mailbox: %ld\n",
711 offset
, PTR_ERR(doe_mb
));
715 rc
= xa_insert(&pdev
->doe_mbs
, offset
, doe_mb
, GFP_KERNEL
);
717 pci_err(pdev
, "[%x] failed to insert mailbox: %d\n",
719 pci_doe_destroy_mb(doe_mb
);
724 void pci_doe_destroy(struct pci_dev
*pdev
)
726 struct pci_doe_mb
*doe_mb
;
729 xa_for_each(&pdev
->doe_mbs
, index
, doe_mb
)
730 pci_doe_destroy_mb(doe_mb
);
732 xa_destroy(&pdev
->doe_mbs
);
735 void pci_doe_disconnected(struct pci_dev
*pdev
)
737 struct pci_doe_mb
*doe_mb
;
740 xa_for_each(&pdev
->doe_mbs
, index
, doe_mb
)
741 pci_doe_cancel_tasks(doe_mb
);