2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
4 * Functions for EQs, NEQs and interrupts
6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Khadija Souissi <souissi@de.ibm.com>
9 * Copyright (c) 2005 IBM Corporation
11 * All rights reserved.
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
42 #include "ehca_classes.h"
44 #include "ehca_iverbs.h"
45 #include "ehca_tools.h"
49 #define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1)
50 #define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31)
51 #define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7)
52 #define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31)
53 #define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31)
54 #define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63)
55 #define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63)
57 #define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1)
58 #define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7)
59 #define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15)
60 #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
62 #define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63)
63 #define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7)
65 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
67 static void queue_comp_task(struct ehca_cq
*__cq
);
69 static struct ehca_comp_pool
* pool
;
70 static struct notifier_block comp_pool_callback_nb
;
74 static inline void comp_event_callback(struct ehca_cq
*cq
)
76 if (!cq
->ib_cq
.comp_handler
)
79 spin_lock(&cq
->cb_lock
);
80 cq
->ib_cq
.comp_handler(&cq
->ib_cq
, cq
->ib_cq
.cq_context
);
81 spin_unlock(&cq
->cb_lock
);
86 static void print_error_data(struct ehca_shca
* shca
, void* data
,
87 u64
* rblock
, int length
)
89 u64 type
= EHCA_BMASK_GET(ERROR_DATA_TYPE
, rblock
[2]);
90 u64 resource
= rblock
[1];
93 case 0x1: /* Queue Pair */
95 struct ehca_qp
*qp
= (struct ehca_qp
*)data
;
97 /* only print error data if AER is set */
101 ehca_err(&shca
->ib_device
,
102 "QP 0x%x (resource=%lx) has errors.",
103 qp
->ib_qp
.qp_num
, resource
);
106 case 0x4: /* Completion Queue */
108 struct ehca_cq
*cq
= (struct ehca_cq
*)data
;
110 ehca_err(&shca
->ib_device
,
111 "CQ 0x%x (resource=%lx) has errors.",
112 cq
->cq_number
, resource
);
116 ehca_err(&shca
->ib_device
,
117 "Unknown errror type: %lx on %s.",
118 type
, shca
->ib_device
.name
);
122 ehca_err(&shca
->ib_device
, "Error data is available: %lx.", resource
);
123 ehca_err(&shca
->ib_device
, "EHCA ----- error data begin "
124 "---------------------------------------------------");
125 ehca_dmp(rblock
, length
, "resource=%lx", resource
);
126 ehca_err(&shca
->ib_device
, "EHCA ----- error data end "
127 "----------------------------------------------------");
132 int ehca_error_data(struct ehca_shca
*shca
, void *data
,
138 unsigned long block_count
;
140 rblock
= kzalloc(H_CB_ALIGNMENT
, GFP_KERNEL
);
142 ehca_err(&shca
->ib_device
, "Cannot allocate rblock memory.");
147 ret
= hipz_h_error_data(shca
->ipz_hca_handle
,
152 if (ret
== H_R_STATE
) {
153 ehca_err(&shca
->ib_device
,
154 "No error data is available: %lx.", resource
);
156 else if (ret
== H_SUCCESS
) {
159 length
= EHCA_BMASK_GET(ERROR_DATA_LENGTH
, rblock
[0]);
161 if (length
> PAGE_SIZE
)
164 print_error_data(shca
, data
, rblock
, length
);
167 ehca_err(&shca
->ib_device
,
168 "Error data could not be fetched: %lx", resource
);
178 static void qp_event_callback(struct ehca_shca
*shca
,
180 enum ib_event_type event_type
)
182 struct ib_event event
;
185 u32 token
= EHCA_BMASK_GET(EQE_QP_TOKEN
, eqe
);
187 spin_lock_irqsave(&ehca_qp_idr_lock
, flags
);
188 qp
= idr_find(&ehca_qp_idr
, token
);
189 spin_unlock_irqrestore(&ehca_qp_idr_lock
, flags
);
195 ehca_error_data(shca
, qp
, qp
->ipz_qp_handle
.handle
);
197 if (!qp
->ib_qp
.event_handler
)
200 event
.device
= &shca
->ib_device
;
201 event
.event
= event_type
;
202 event
.element
.qp
= &qp
->ib_qp
;
204 qp
->ib_qp
.event_handler(&event
, qp
->ib_qp
.qp_context
);
209 static void cq_event_callback(struct ehca_shca
*shca
,
214 u32 token
= EHCA_BMASK_GET(EQE_CQ_TOKEN
, eqe
);
216 spin_lock_irqsave(&ehca_cq_idr_lock
, flags
);
217 cq
= idr_find(&ehca_cq_idr
, token
);
218 spin_unlock_irqrestore(&ehca_cq_idr_lock
, flags
);
223 ehca_error_data(shca
, cq
, cq
->ipz_cq_handle
.handle
);
228 static void parse_identifier(struct ehca_shca
*shca
, u64 eqe
)
230 u8 identifier
= EHCA_BMASK_GET(EQE_EE_IDENTIFIER
, eqe
);
232 switch (identifier
) {
233 case 0x02: /* path migrated */
234 qp_event_callback(shca
, eqe
, IB_EVENT_PATH_MIG
);
236 case 0x03: /* communication established */
237 qp_event_callback(shca
, eqe
, IB_EVENT_COMM_EST
);
239 case 0x04: /* send queue drained */
240 qp_event_callback(shca
, eqe
, IB_EVENT_SQ_DRAINED
);
242 case 0x05: /* QP error */
243 case 0x06: /* QP error */
244 qp_event_callback(shca
, eqe
, IB_EVENT_QP_FATAL
);
246 case 0x07: /* CQ error */
247 case 0x08: /* CQ error */
248 cq_event_callback(shca
, eqe
);
250 case 0x09: /* MRMWPTE error */
251 ehca_err(&shca
->ib_device
, "MRMWPTE error.");
253 case 0x0A: /* port event */
254 ehca_err(&shca
->ib_device
, "Port event.");
256 case 0x0B: /* MR access error */
257 ehca_err(&shca
->ib_device
, "MR access error.");
259 case 0x0C: /* EQ error */
260 ehca_err(&shca
->ib_device
, "EQ error.");
262 case 0x0D: /* P/Q_Key mismatch */
263 ehca_err(&shca
->ib_device
, "P/Q_Key mismatch.");
265 case 0x10: /* sampling complete */
266 ehca_err(&shca
->ib_device
, "Sampling complete.");
268 case 0x11: /* unaffiliated access error */
269 ehca_err(&shca
->ib_device
, "Unaffiliated access error.");
271 case 0x12: /* path migrating error */
272 ehca_err(&shca
->ib_device
, "Path migration error.");
274 case 0x13: /* interface trace stopped */
275 ehca_err(&shca
->ib_device
, "Interface trace stopped.");
277 case 0x14: /* first error capture info available */
279 ehca_err(&shca
->ib_device
, "Unknown identifier: %x on %s.",
280 identifier
, shca
->ib_device
.name
);
287 static void parse_ec(struct ehca_shca
*shca
, u64 eqe
)
289 struct ib_event event
;
290 u8 ec
= EHCA_BMASK_GET(NEQE_EVENT_CODE
, eqe
);
291 u8 port
= EHCA_BMASK_GET(NEQE_PORT_NUMBER
, eqe
);
294 case 0x30: /* port availability change */
295 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY
, eqe
)) {
296 ehca_info(&shca
->ib_device
,
297 "port %x is active.", port
);
298 event
.device
= &shca
->ib_device
;
299 event
.event
= IB_EVENT_PORT_ACTIVE
;
300 event
.element
.port_num
= port
;
301 shca
->sport
[port
- 1].port_state
= IB_PORT_ACTIVE
;
302 ib_dispatch_event(&event
);
304 ehca_info(&shca
->ib_device
,
305 "port %x is inactive.", port
);
306 event
.device
= &shca
->ib_device
;
307 event
.event
= IB_EVENT_PORT_ERR
;
308 event
.element
.port_num
= port
;
309 shca
->sport
[port
- 1].port_state
= IB_PORT_DOWN
;
310 ib_dispatch_event(&event
);
314 /* port configuration change
315 * disruptive change is caused by
316 * LID, PKEY or SM change
318 ehca_warn(&shca
->ib_device
,
319 "disruptive port %x configuration change", port
);
321 ehca_info(&shca
->ib_device
,
322 "port %x is inactive.", port
);
323 event
.device
= &shca
->ib_device
;
324 event
.event
= IB_EVENT_PORT_ERR
;
325 event
.element
.port_num
= port
;
326 shca
->sport
[port
- 1].port_state
= IB_PORT_DOWN
;
327 ib_dispatch_event(&event
);
329 ehca_info(&shca
->ib_device
,
330 "port %x is active.", port
);
331 event
.device
= &shca
->ib_device
;
332 event
.event
= IB_EVENT_PORT_ACTIVE
;
333 event
.element
.port_num
= port
;
334 shca
->sport
[port
- 1].port_state
= IB_PORT_ACTIVE
;
335 ib_dispatch_event(&event
);
337 case 0x32: /* adapter malfunction */
338 ehca_err(&shca
->ib_device
, "Adapter malfunction.");
340 case 0x33: /* trace stopped */
341 ehca_err(&shca
->ib_device
, "Traced stopped.");
344 ehca_err(&shca
->ib_device
, "Unknown event code: %x on %s.",
345 ec
, shca
->ib_device
.name
);
352 static inline void reset_eq_pending(struct ehca_cq
*cq
)
355 struct h_galpa gal
= cq
->galpas
.kernel
;
357 hipz_galpa_store_cq(gal
, cqx_ep
, 0x0);
358 CQx_EP
= hipz_galpa_load(gal
, CQTEMM_OFFSET(cqx_ep
));
363 irqreturn_t
ehca_interrupt_neq(int irq
, void *dev_id
, struct pt_regs
*regs
)
365 struct ehca_shca
*shca
= (struct ehca_shca
*)dev_id
;
367 tasklet_hi_schedule(&shca
->neq
.interrupt_task
);
372 void ehca_tasklet_neq(unsigned long data
)
374 struct ehca_shca
*shca
= (struct ehca_shca
*)data
;
375 struct ehca_eqe
*eqe
;
378 eqe
= (struct ehca_eqe
*)ehca_poll_eq(shca
, &shca
->neq
);
381 if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT
, eqe
->entry
))
382 parse_ec(shca
, eqe
->entry
);
384 eqe
= (struct ehca_eqe
*)ehca_poll_eq(shca
, &shca
->neq
);
387 ret
= hipz_h_reset_event(shca
->ipz_hca_handle
,
388 shca
->neq
.ipz_eq_handle
, 0xFFFFFFFFFFFFFFFFL
);
390 if (ret
!= H_SUCCESS
)
391 ehca_err(&shca
->ib_device
, "Can't clear notification events.");
396 irqreturn_t
ehca_interrupt_eq(int irq
, void *dev_id
, struct pt_regs
*regs
)
398 struct ehca_shca
*shca
= (struct ehca_shca
*)dev_id
;
400 tasklet_hi_schedule(&shca
->eq
.interrupt_task
);
405 void ehca_tasklet_eq(unsigned long data
)
407 struct ehca_shca
*shca
= (struct ehca_shca
*)data
;
408 struct ehca_eqe
*eqe
;
413 eqe
= (struct ehca_eqe
*)ehca_poll_eq(shca
, &shca
->eq
);
415 if ((shca
->hw_level
>= 2) && eqe
)
420 while ((int_state
== 1) || eqe
) {
422 u64 eqe_value
= eqe
->entry
;
424 ehca_dbg(&shca
->ib_device
,
425 "eqe_value=%lx", eqe_value
);
427 /* TODO: better structure */
428 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT
,
434 ehca_dbg(&shca
->ib_device
,
435 "... completion event");
437 EHCA_BMASK_GET(EQE_CQ_TOKEN
,
439 spin_lock_irqsave(&ehca_cq_idr_lock
,
441 cq
= idr_find(&ehca_cq_idr
, token
);
444 spin_unlock(&ehca_cq_idr_lock
);
448 reset_eq_pending(cq
);
449 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
451 spin_unlock_irqrestore(&ehca_cq_idr_lock
,
454 spin_unlock_irqrestore(&ehca_cq_idr_lock
,
456 comp_event_callback(cq
);
459 ehca_dbg(&shca
->ib_device
,
460 "... non completion event");
461 parse_identifier(shca
, eqe_value
);
464 (struct ehca_eqe
*)ehca_poll_eq(shca
,
468 if (shca
->hw_level
>= 2) {
470 hipz_h_query_int_state(shca
->ipz_hca_handle
,
474 if (query_cnt
>= 100) {
479 eqe
= (struct ehca_eqe
*)ehca_poll_eq(shca
, &shca
->eq
);
482 } while (int_state
!= 0);
487 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
489 static inline int find_next_online_cpu(struct ehca_comp_pool
* pool
)
491 unsigned long flags_last_cpu
;
493 if (ehca_debug_level
)
494 ehca_dmp(&cpu_online_map
, sizeof(cpumask_t
), "");
496 spin_lock_irqsave(&pool
->last_cpu_lock
, flags_last_cpu
);
497 pool
->last_cpu
= next_cpu(pool
->last_cpu
, cpu_online_map
);
498 if (pool
->last_cpu
== NR_CPUS
)
499 pool
->last_cpu
= first_cpu(cpu_online_map
);
500 spin_unlock_irqrestore(&pool
->last_cpu_lock
, flags_last_cpu
);
502 return pool
->last_cpu
;
505 static void __queue_comp_task(struct ehca_cq
*__cq
,
506 struct ehca_cpu_comp_task
*cct
)
508 unsigned long flags_cct
;
509 unsigned long flags_cq
;
511 spin_lock_irqsave(&cct
->task_lock
, flags_cct
);
512 spin_lock_irqsave(&__cq
->task_lock
, flags_cq
);
514 if (__cq
->nr_callbacks
== 0) {
515 __cq
->nr_callbacks
++;
516 list_add_tail(&__cq
->entry
, &cct
->cq_list
);
518 wake_up(&cct
->wait_queue
);
521 __cq
->nr_callbacks
++;
523 spin_unlock_irqrestore(&__cq
->task_lock
, flags_cq
);
524 spin_unlock_irqrestore(&cct
->task_lock
, flags_cct
);
527 static void queue_comp_task(struct ehca_cq
*__cq
)
531 struct ehca_cpu_comp_task
*cct
;
534 cpu_id
= find_next_online_cpu(pool
);
536 BUG_ON(!cpu_online(cpu_id
));
538 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu_id
);
540 if (cct
->cq_jobs
> 0) {
541 cpu_id
= find_next_online_cpu(pool
);
542 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu_id
);
545 __queue_comp_task(__cq
, cct
);
552 static void run_comp_task(struct ehca_cpu_comp_task
* cct
)
555 unsigned long flags_cct
;
556 unsigned long flags_cq
;
558 spin_lock_irqsave(&cct
->task_lock
, flags_cct
);
560 while (!list_empty(&cct
->cq_list
)) {
561 cq
= list_entry(cct
->cq_list
.next
, struct ehca_cq
, entry
);
562 spin_unlock_irqrestore(&cct
->task_lock
, flags_cct
);
563 comp_event_callback(cq
);
564 spin_lock_irqsave(&cct
->task_lock
, flags_cct
);
566 spin_lock_irqsave(&cq
->task_lock
, flags_cq
);
568 if (cq
->nr_callbacks
== 0) {
569 list_del_init(cct
->cq_list
.next
);
572 spin_unlock_irqrestore(&cq
->task_lock
, flags_cq
);
576 spin_unlock_irqrestore(&cct
->task_lock
, flags_cct
);
581 static int comp_task(void *__cct
)
583 struct ehca_cpu_comp_task
* cct
= __cct
;
584 DECLARE_WAITQUEUE(wait
, current
);
586 set_current_state(TASK_INTERRUPTIBLE
);
587 while(!kthread_should_stop()) {
588 add_wait_queue(&cct
->wait_queue
, &wait
);
590 if (list_empty(&cct
->cq_list
))
593 __set_current_state(TASK_RUNNING
);
595 remove_wait_queue(&cct
->wait_queue
, &wait
);
597 if (!list_empty(&cct
->cq_list
))
598 run_comp_task(__cct
);
600 set_current_state(TASK_INTERRUPTIBLE
);
602 __set_current_state(TASK_RUNNING
);
607 static struct task_struct
*create_comp_task(struct ehca_comp_pool
*pool
,
610 struct ehca_cpu_comp_task
*cct
;
612 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
613 spin_lock_init(&cct
->task_lock
);
614 INIT_LIST_HEAD(&cct
->cq_list
);
615 init_waitqueue_head(&cct
->wait_queue
);
616 cct
->task
= kthread_create(comp_task
, cct
, "ehca_comp/%d", cpu
);
621 static void destroy_comp_task(struct ehca_comp_pool
*pool
,
624 struct ehca_cpu_comp_task
*cct
;
625 struct task_struct
*task
;
626 unsigned long flags_cct
;
628 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
630 spin_lock_irqsave(&cct
->task_lock
, flags_cct
);
636 spin_unlock_irqrestore(&cct
->task_lock
, flags_cct
);
644 static void take_over_work(struct ehca_comp_pool
*pool
,
647 struct ehca_cpu_comp_task
*cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
650 unsigned long flags_cct
;
652 spin_lock_irqsave(&cct
->task_lock
, flags_cct
);
654 list_splice_init(&cct
->cq_list
, &list
);
656 while(!list_empty(&list
)) {
657 cq
= list_entry(cct
->cq_list
.next
, struct ehca_cq
, entry
);
659 list_del(&cq
->entry
);
660 __queue_comp_task(cq
, per_cpu_ptr(pool
->cpu_comp_tasks
,
661 smp_processor_id()));
664 spin_unlock_irqrestore(&cct
->task_lock
, flags_cct
);
668 static int comp_pool_callback(struct notifier_block
*nfb
,
669 unsigned long action
,
672 unsigned int cpu
= (unsigned long)hcpu
;
673 struct ehca_cpu_comp_task
*cct
;
677 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu
);
678 if(!create_comp_task(pool
, cpu
)) {
679 ehca_gen_err("Can't create comp_task for cpu: %x", cpu
);
683 case CPU_UP_CANCELED
:
684 ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu
);
685 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
686 kthread_bind(cct
->task
, any_online_cpu(cpu_online_map
));
687 destroy_comp_task(pool
, cpu
);
690 ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu
);
691 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
692 kthread_bind(cct
->task
, cpu
);
693 wake_up_process(cct
->task
);
695 case CPU_DOWN_PREPARE
:
696 ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu
);
698 case CPU_DOWN_FAILED
:
699 ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu
);
702 ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu
);
703 destroy_comp_task(pool
, cpu
);
704 take_over_work(pool
, cpu
);
713 int ehca_create_comp_pool(void)
715 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
717 struct task_struct
*task
;
719 pool
= kzalloc(sizeof(struct ehca_comp_pool
), GFP_KERNEL
);
723 spin_lock_init(&pool
->last_cpu_lock
);
724 pool
->last_cpu
= any_online_cpu(cpu_online_map
);
726 pool
->cpu_comp_tasks
= alloc_percpu(struct ehca_cpu_comp_task
);
727 if (pool
->cpu_comp_tasks
== NULL
) {
732 for_each_online_cpu(cpu
) {
733 task
= create_comp_task(pool
, cpu
);
735 kthread_bind(task
, cpu
);
736 wake_up_process(task
);
740 comp_pool_callback_nb
.notifier_call
= comp_pool_callback
;
741 comp_pool_callback_nb
.priority
=0;
742 register_cpu_notifier(&comp_pool_callback_nb
);
748 void ehca_destroy_comp_pool(void)
750 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
753 unregister_cpu_notifier(&comp_pool_callback_nb
);
755 for (i
= 0; i
< NR_CPUS
; i
++) {
757 destroy_comp_task(pool
, i
);