2 * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
3 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/interrupt.h>
36 #include <linux/dma-mapping.h>
38 #include <linux/mlx4/cmd.h>
44 MLX4_IRQNAME_SIZE
= 64
48 MLX4_NUM_ASYNC_EQE
= 0x100,
49 MLX4_NUM_SPARE_EQE
= 0x80,
50 MLX4_EQ_ENTRY_SIZE
= 0x20
54 * Must be packed because start is 64 bits but only aligned to 32 bits.
56 struct mlx4_eq_context
{
70 __be32 mtt_base_addr_l
;
72 __be32 consumer_index
;
73 __be32 producer_index
;
77 #define MLX4_EQ_STATUS_OK ( 0 << 28)
78 #define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28)
79 #define MLX4_EQ_OWNER_SW ( 0 << 24)
80 #define MLX4_EQ_OWNER_HW ( 1 << 24)
81 #define MLX4_EQ_FLAG_EC ( 1 << 18)
82 #define MLX4_EQ_FLAG_OI ( 1 << 17)
83 #define MLX4_EQ_STATE_ARMED ( 9 << 8)
84 #define MLX4_EQ_STATE_FIRED (10 << 8)
85 #define MLX4_EQ_STATE_ALWAYS_ARMED (11 << 8)
87 #define MLX4_ASYNC_EVENT_MASK ((1ull << MLX4_EVENT_TYPE_PATH_MIG) | \
88 (1ull << MLX4_EVENT_TYPE_COMM_EST) | \
89 (1ull << MLX4_EVENT_TYPE_SQ_DRAINED) | \
90 (1ull << MLX4_EVENT_TYPE_CQ_ERROR) | \
91 (1ull << MLX4_EVENT_TYPE_WQ_CATAS_ERROR) | \
92 (1ull << MLX4_EVENT_TYPE_EEC_CATAS_ERROR) | \
93 (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \
94 (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
95 (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \
96 (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \
97 (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \
98 (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \
99 (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
100 (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \
101 (1ull << MLX4_EVENT_TYPE_CMD))
112 } __attribute__((packed
)) comp
;
120 } __attribute__((packed
)) cmd
;
123 } __attribute__((packed
)) qp
;
126 } __attribute__((packed
)) srq
;
132 } __attribute__((packed
)) cq_err
;
136 } __attribute__((packed
)) port_change
;
140 } __attribute__((packed
));
142 static void eq_set_ci(struct mlx4_eq
*eq
, int req_not
)
144 __raw_writel((__force u32
) cpu_to_be32((eq
->cons_index
& 0xffffff) |
147 /* We still want ordering, just not swabbing, so add a barrier */
151 static struct mlx4_eqe
*get_eqe(struct mlx4_eq
*eq
, u32 entry
)
153 unsigned long off
= (entry
& (eq
->nent
- 1)) * MLX4_EQ_ENTRY_SIZE
;
154 return eq
->page_list
[off
/ PAGE_SIZE
].buf
+ off
% PAGE_SIZE
;
157 static struct mlx4_eqe
*next_eqe_sw(struct mlx4_eq
*eq
)
159 struct mlx4_eqe
*eqe
= get_eqe(eq
, eq
->cons_index
);
160 return !!(eqe
->owner
& 0x80) ^ !!(eq
->cons_index
& eq
->nent
) ? NULL
: eqe
;
163 static int mlx4_eq_int(struct mlx4_dev
*dev
, struct mlx4_eq
*eq
)
165 struct mlx4_eqe
*eqe
;
171 while ((eqe
= next_eqe_sw(eq
))) {
173 * Make sure we read EQ entry contents after we've
174 * checked the ownership bit.
179 case MLX4_EVENT_TYPE_COMP
:
180 cqn
= be32_to_cpu(eqe
->event
.comp
.cqn
) & 0xffffff;
181 mlx4_cq_completion(dev
, cqn
);
184 case MLX4_EVENT_TYPE_PATH_MIG
:
185 case MLX4_EVENT_TYPE_COMM_EST
:
186 case MLX4_EVENT_TYPE_SQ_DRAINED
:
187 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE
:
188 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR
:
189 case MLX4_EVENT_TYPE_PATH_MIG_FAILED
:
190 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR
:
191 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR
:
192 mlx4_qp_event(dev
, be32_to_cpu(eqe
->event
.qp
.qpn
) & 0xffffff,
196 case MLX4_EVENT_TYPE_SRQ_LIMIT
:
197 case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR
:
198 mlx4_srq_event(dev
, be32_to_cpu(eqe
->event
.srq
.srqn
) & 0xffffff,
202 case MLX4_EVENT_TYPE_CMD
:
204 be16_to_cpu(eqe
->event
.cmd
.token
),
205 eqe
->event
.cmd
.status
,
206 be64_to_cpu(eqe
->event
.cmd
.out_param
));
209 case MLX4_EVENT_TYPE_PORT_CHANGE
:
210 port
= be32_to_cpu(eqe
->event
.port_change
.port
) >> 28;
211 if (eqe
->subtype
== MLX4_PORT_CHANGE_SUBTYPE_DOWN
) {
212 mlx4_dispatch_event(dev
, MLX4_DEV_EVENT_PORT_DOWN
,
214 mlx4_priv(dev
)->sense
.do_sense_port
[port
] = 1;
216 mlx4_dispatch_event(dev
, MLX4_DEV_EVENT_PORT_UP
,
218 mlx4_priv(dev
)->sense
.do_sense_port
[port
] = 0;
222 case MLX4_EVENT_TYPE_CQ_ERROR
:
223 mlx4_warn(dev
, "CQ %s on CQN %06x\n",
224 eqe
->event
.cq_err
.syndrome
== 1 ?
225 "overrun" : "access violation",
226 be32_to_cpu(eqe
->event
.cq_err
.cqn
) & 0xffffff);
227 mlx4_cq_event(dev
, be32_to_cpu(eqe
->event
.cq_err
.cqn
),
231 case MLX4_EVENT_TYPE_EQ_OVERFLOW
:
232 mlx4_warn(dev
, "EQ overrun on EQN %d\n", eq
->eqn
);
235 case MLX4_EVENT_TYPE_EEC_CATAS_ERROR
:
236 case MLX4_EVENT_TYPE_ECC_DETECT
:
238 mlx4_warn(dev
, "Unhandled event %02x(%02x) on EQ %d at index %u\n",
239 eqe
->type
, eqe
->subtype
, eq
->eqn
, eq
->cons_index
);
248 * The HCA will think the queue has overflowed if we
249 * don't tell it we've been processing events. We
250 * create our EQs with MLX4_NUM_SPARE_EQE extra
251 * entries, so we must update our consumer index at
254 if (unlikely(set_ci
>= MLX4_NUM_SPARE_EQE
)) {
265 static irqreturn_t
mlx4_interrupt(int irq
, void *dev_ptr
)
267 struct mlx4_dev
*dev
= dev_ptr
;
268 struct mlx4_priv
*priv
= mlx4_priv(dev
);
272 writel(priv
->eq_table
.clr_mask
, priv
->eq_table
.clr_int
);
274 for (i
= 0; i
< dev
->caps
.num_comp_vectors
+ 1; ++i
)
275 work
|= mlx4_eq_int(dev
, &priv
->eq_table
.eq
[i
]);
277 return IRQ_RETVAL(work
);
280 static irqreturn_t
mlx4_msi_x_interrupt(int irq
, void *eq_ptr
)
282 struct mlx4_eq
*eq
= eq_ptr
;
283 struct mlx4_dev
*dev
= eq
->dev
;
285 mlx4_eq_int(dev
, eq
);
287 /* MSI-X vectors always belong to us */
291 static int mlx4_MAP_EQ(struct mlx4_dev
*dev
, u64 event_mask
, int unmap
,
294 return mlx4_cmd(dev
, event_mask
, (unmap
<< 31) | eq_num
,
295 0, MLX4_CMD_MAP_EQ
, MLX4_CMD_TIME_CLASS_B
);
298 static int mlx4_SW2HW_EQ(struct mlx4_dev
*dev
, struct mlx4_cmd_mailbox
*mailbox
,
301 return mlx4_cmd(dev
, mailbox
->dma
, eq_num
, 0, MLX4_CMD_SW2HW_EQ
,
302 MLX4_CMD_TIME_CLASS_A
);
305 static int mlx4_HW2SW_EQ(struct mlx4_dev
*dev
, struct mlx4_cmd_mailbox
*mailbox
,
308 return mlx4_cmd_box(dev
, 0, mailbox
->dma
, eq_num
, 0, MLX4_CMD_HW2SW_EQ
,
309 MLX4_CMD_TIME_CLASS_A
);
312 static int mlx4_num_eq_uar(struct mlx4_dev
*dev
)
315 * Each UAR holds 4 EQ doorbells. To figure out how many UARs
316 * we need to map, take the difference of highest index and
317 * the lowest index we'll use and add 1.
319 return (dev
->caps
.num_comp_vectors
+ 1 + dev
->caps
.reserved_eqs
) / 4 -
320 dev
->caps
.reserved_eqs
/ 4 + 1;
323 static void __iomem
*mlx4_get_eq_uar(struct mlx4_dev
*dev
, struct mlx4_eq
*eq
)
325 struct mlx4_priv
*priv
= mlx4_priv(dev
);
328 index
= eq
->eqn
/ 4 - dev
->caps
.reserved_eqs
/ 4;
330 if (!priv
->eq_table
.uar_map
[index
]) {
331 priv
->eq_table
.uar_map
[index
] =
332 ioremap(pci_resource_start(dev
->pdev
, 2) +
333 ((eq
->eqn
/ 4) << PAGE_SHIFT
),
335 if (!priv
->eq_table
.uar_map
[index
]) {
336 mlx4_err(dev
, "Couldn't map EQ doorbell for EQN 0x%06x\n",
342 return priv
->eq_table
.uar_map
[index
] + 0x800 + 8 * (eq
->eqn
% 4);
345 static int mlx4_create_eq(struct mlx4_dev
*dev
, int nent
,
346 u8 intr
, struct mlx4_eq
*eq
)
348 struct mlx4_priv
*priv
= mlx4_priv(dev
);
349 struct mlx4_cmd_mailbox
*mailbox
;
350 struct mlx4_eq_context
*eq_context
;
352 u64
*dma_list
= NULL
;
359 eq
->nent
= roundup_pow_of_two(max(nent
, 2));
360 npages
= PAGE_ALIGN(eq
->nent
* MLX4_EQ_ENTRY_SIZE
) / PAGE_SIZE
;
362 eq
->page_list
= kmalloc(npages
* sizeof *eq
->page_list
,
367 for (i
= 0; i
< npages
; ++i
)
368 eq
->page_list
[i
].buf
= NULL
;
370 dma_list
= kmalloc(npages
* sizeof *dma_list
, GFP_KERNEL
);
374 mailbox
= mlx4_alloc_cmd_mailbox(dev
);
377 eq_context
= mailbox
->buf
;
379 for (i
= 0; i
< npages
; ++i
) {
380 eq
->page_list
[i
].buf
= dma_alloc_coherent(&dev
->pdev
->dev
,
381 PAGE_SIZE
, &t
, GFP_KERNEL
);
382 if (!eq
->page_list
[i
].buf
)
383 goto err_out_free_pages
;
386 eq
->page_list
[i
].map
= t
;
388 memset(eq
->page_list
[i
].buf
, 0, PAGE_SIZE
);
391 eq
->eqn
= mlx4_bitmap_alloc(&priv
->eq_table
.bitmap
);
393 goto err_out_free_pages
;
395 eq
->doorbell
= mlx4_get_eq_uar(dev
, eq
);
398 goto err_out_free_eq
;
401 err
= mlx4_mtt_init(dev
, npages
, PAGE_SHIFT
, &eq
->mtt
);
403 goto err_out_free_eq
;
405 err
= mlx4_write_mtt(dev
, &eq
->mtt
, 0, npages
, dma_list
);
407 goto err_out_free_mtt
;
409 memset(eq_context
, 0, sizeof *eq_context
);
410 eq_context
->flags
= cpu_to_be32(MLX4_EQ_STATUS_OK
|
411 MLX4_EQ_STATE_ARMED
);
412 eq_context
->log_eq_size
= ilog2(eq
->nent
);
413 eq_context
->intr
= intr
;
414 eq_context
->log_page_size
= PAGE_SHIFT
- MLX4_ICM_PAGE_SHIFT
;
416 mtt_addr
= mlx4_mtt_addr(dev
, &eq
->mtt
);
417 eq_context
->mtt_base_addr_h
= mtt_addr
>> 32;
418 eq_context
->mtt_base_addr_l
= cpu_to_be32(mtt_addr
& 0xffffffff);
420 err
= mlx4_SW2HW_EQ(dev
, mailbox
, eq
->eqn
);
422 mlx4_warn(dev
, "SW2HW_EQ failed (%d)\n", err
);
423 goto err_out_free_mtt
;
427 mlx4_free_cmd_mailbox(dev
, mailbox
);
434 mlx4_mtt_cleanup(dev
, &eq
->mtt
);
437 mlx4_bitmap_free(&priv
->eq_table
.bitmap
, eq
->eqn
);
440 for (i
= 0; i
< npages
; ++i
)
441 if (eq
->page_list
[i
].buf
)
442 dma_free_coherent(&dev
->pdev
->dev
, PAGE_SIZE
,
443 eq
->page_list
[i
].buf
,
444 eq
->page_list
[i
].map
);
446 mlx4_free_cmd_mailbox(dev
, mailbox
);
449 kfree(eq
->page_list
);
456 static void mlx4_free_eq(struct mlx4_dev
*dev
,
459 struct mlx4_priv
*priv
= mlx4_priv(dev
);
460 struct mlx4_cmd_mailbox
*mailbox
;
462 int npages
= PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE
* eq
->nent
) / PAGE_SIZE
;
465 mailbox
= mlx4_alloc_cmd_mailbox(dev
);
469 err
= mlx4_HW2SW_EQ(dev
, mailbox
, eq
->eqn
);
471 mlx4_warn(dev
, "HW2SW_EQ failed (%d)\n", err
);
474 mlx4_dbg(dev
, "Dumping EQ context %02x:\n", eq
->eqn
);
475 for (i
= 0; i
< sizeof (struct mlx4_eq_context
) / 4; ++i
) {
477 printk("[%02x] ", i
* 4);
478 printk(" %08x", be32_to_cpup(mailbox
->buf
+ i
* 4));
479 if ((i
+ 1) % 4 == 0)
484 mlx4_mtt_cleanup(dev
, &eq
->mtt
);
485 for (i
= 0; i
< npages
; ++i
)
486 pci_free_consistent(dev
->pdev
, PAGE_SIZE
,
487 eq
->page_list
[i
].buf
,
488 eq
->page_list
[i
].map
);
490 kfree(eq
->page_list
);
491 mlx4_bitmap_free(&priv
->eq_table
.bitmap
, eq
->eqn
);
492 mlx4_free_cmd_mailbox(dev
, mailbox
);
495 static void mlx4_free_irqs(struct mlx4_dev
*dev
)
497 struct mlx4_eq_table
*eq_table
= &mlx4_priv(dev
)->eq_table
;
500 if (eq_table
->have_irq
)
501 free_irq(dev
->pdev
->irq
, dev
);
502 for (i
= 0; i
< dev
->caps
.num_comp_vectors
+ 1; ++i
)
503 if (eq_table
->eq
[i
].have_irq
) {
504 free_irq(eq_table
->eq
[i
].irq
, eq_table
->eq
+ i
);
505 eq_table
->eq
[i
].have_irq
= 0;
508 kfree(eq_table
->irq_names
);
511 static int mlx4_map_clr_int(struct mlx4_dev
*dev
)
513 struct mlx4_priv
*priv
= mlx4_priv(dev
);
515 priv
->clr_base
= ioremap(pci_resource_start(dev
->pdev
, priv
->fw
.clr_int_bar
) +
516 priv
->fw
.clr_int_base
, MLX4_CLR_INT_SIZE
);
517 if (!priv
->clr_base
) {
518 mlx4_err(dev
, "Couldn't map interrupt clear register, aborting.\n");
525 static void mlx4_unmap_clr_int(struct mlx4_dev
*dev
)
527 struct mlx4_priv
*priv
= mlx4_priv(dev
);
529 iounmap(priv
->clr_base
);
532 int mlx4_alloc_eq_table(struct mlx4_dev
*dev
)
534 struct mlx4_priv
*priv
= mlx4_priv(dev
);
536 priv
->eq_table
.eq
= kcalloc(dev
->caps
.num_eqs
- dev
->caps
.reserved_eqs
,
537 sizeof *priv
->eq_table
.eq
, GFP_KERNEL
);
538 if (!priv
->eq_table
.eq
)
544 void mlx4_free_eq_table(struct mlx4_dev
*dev
)
546 kfree(mlx4_priv(dev
)->eq_table
.eq
);
549 int mlx4_init_eq_table(struct mlx4_dev
*dev
)
551 struct mlx4_priv
*priv
= mlx4_priv(dev
);
555 priv
->eq_table
.uar_map
= kcalloc(sizeof *priv
->eq_table
.uar_map
,
556 mlx4_num_eq_uar(dev
), GFP_KERNEL
);
557 if (!priv
->eq_table
.uar_map
) {
562 err
= mlx4_bitmap_init(&priv
->eq_table
.bitmap
, dev
->caps
.num_eqs
,
563 dev
->caps
.num_eqs
- 1, dev
->caps
.reserved_eqs
, 0);
567 for (i
= 0; i
< mlx4_num_eq_uar(dev
); ++i
)
568 priv
->eq_table
.uar_map
[i
] = NULL
;
570 err
= mlx4_map_clr_int(dev
);
574 priv
->eq_table
.clr_mask
=
575 swab32(1 << (priv
->eq_table
.inta_pin
& 31));
576 priv
->eq_table
.clr_int
= priv
->clr_base
+
577 (priv
->eq_table
.inta_pin
< 32 ? 4 : 0);
579 priv
->eq_table
.irq_names
=
580 kmalloc(MLX4_IRQNAME_SIZE
* (dev
->caps
.num_comp_vectors
+ 1),
582 if (!priv
->eq_table
.irq_names
) {
587 for (i
= 0; i
< dev
->caps
.num_comp_vectors
; ++i
) {
588 err
= mlx4_create_eq(dev
, dev
->caps
.num_cqs
+ MLX4_NUM_SPARE_EQE
,
589 (dev
->flags
& MLX4_FLAG_MSI_X
) ? i
: 0,
590 &priv
->eq_table
.eq
[i
]);
597 err
= mlx4_create_eq(dev
, MLX4_NUM_ASYNC_EQE
+ MLX4_NUM_SPARE_EQE
,
598 (dev
->flags
& MLX4_FLAG_MSI_X
) ? dev
->caps
.num_comp_vectors
: 0,
599 &priv
->eq_table
.eq
[dev
->caps
.num_comp_vectors
]);
603 if (dev
->flags
& MLX4_FLAG_MSI_X
) {
606 for (i
= 0; i
< dev
->caps
.num_comp_vectors
+ 1; ++i
) {
607 if (i
< dev
->caps
.num_comp_vectors
) {
608 snprintf(priv
->eq_table
.irq_names
+
609 i
* MLX4_IRQNAME_SIZE
,
611 "mlx4-comp-%d@pci:%s", i
,
612 pci_name(dev
->pdev
));
614 snprintf(priv
->eq_table
.irq_names
+
615 i
* MLX4_IRQNAME_SIZE
,
618 pci_name(dev
->pdev
));
621 eq_name
= priv
->eq_table
.irq_names
+
622 i
* MLX4_IRQNAME_SIZE
;
623 err
= request_irq(priv
->eq_table
.eq
[i
].irq
,
624 mlx4_msi_x_interrupt
, 0, eq_name
,
625 priv
->eq_table
.eq
+ i
);
629 priv
->eq_table
.eq
[i
].have_irq
= 1;
632 snprintf(priv
->eq_table
.irq_names
,
635 pci_name(dev
->pdev
));
636 err
= request_irq(dev
->pdev
->irq
, mlx4_interrupt
,
637 IRQF_SHARED
, priv
->eq_table
.irq_names
, dev
);
641 priv
->eq_table
.have_irq
= 1;
644 err
= mlx4_MAP_EQ(dev
, MLX4_ASYNC_EVENT_MASK
, 0,
645 priv
->eq_table
.eq
[dev
->caps
.num_comp_vectors
].eqn
);
647 mlx4_warn(dev
, "MAP_EQ for async EQ %d failed (%d)\n",
648 priv
->eq_table
.eq
[dev
->caps
.num_comp_vectors
].eqn
, err
);
650 for (i
= 0; i
< dev
->caps
.num_comp_vectors
+ 1; ++i
)
651 eq_set_ci(&priv
->eq_table
.eq
[i
], 1);
656 mlx4_free_eq(dev
, &priv
->eq_table
.eq
[dev
->caps
.num_comp_vectors
]);
659 i
= dev
->caps
.num_comp_vectors
- 1;
663 mlx4_free_eq(dev
, &priv
->eq_table
.eq
[i
]);
666 mlx4_unmap_clr_int(dev
);
670 mlx4_bitmap_cleanup(&priv
->eq_table
.bitmap
);
673 kfree(priv
->eq_table
.uar_map
);
678 void mlx4_cleanup_eq_table(struct mlx4_dev
*dev
)
680 struct mlx4_priv
*priv
= mlx4_priv(dev
);
683 mlx4_MAP_EQ(dev
, MLX4_ASYNC_EVENT_MASK
, 1,
684 priv
->eq_table
.eq
[dev
->caps
.num_comp_vectors
].eqn
);
688 for (i
= 0; i
< dev
->caps
.num_comp_vectors
+ 1; ++i
)
689 mlx4_free_eq(dev
, &priv
->eq_table
.eq
[i
]);
691 mlx4_unmap_clr_int(dev
);
693 for (i
= 0; i
< mlx4_num_eq_uar(dev
); ++i
)
694 if (priv
->eq_table
.uar_map
[i
])
695 iounmap(priv
->eq_table
.uar_map
[i
]);
697 mlx4_bitmap_cleanup(&priv
->eq_table
.bitmap
);
699 kfree(priv
->eq_table
.uar_map
);