2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
4 * Functions for EQs, NEQs and interrupts
6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Khadija Souissi <souissi@de.ibm.com>
8 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
9 * Joachim Fenkes <fenkes@de.ibm.com>
11 * Copyright (c) 2005 IBM Corporation
13 * All rights reserved.
15 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions are met:
23 * Redistributions of source code must retain the above copyright notice, this
24 * list of conditions and the following disclaimer.
26 * Redistributions in binary form must reproduce the above copyright notice,
27 * this list of conditions and the following disclaimer in the documentation
28 * and/or other materials
29 * provided with the distribution.
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
39 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 * POSSIBILITY OF SUCH DAMAGE.
44 #include <linux/slab.h>
45 #include <linux/smpboot.h>
47 #include "ehca_classes.h"
49 #include "ehca_iverbs.h"
50 #include "ehca_tools.h"
53 #include "ipz_pt_fn.h"
55 #define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
56 #define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
57 #define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7)
58 #define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31)
59 #define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
60 #define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63)
61 #define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63)
63 #define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
64 #define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7)
65 #define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15)
66 #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
67 #define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16)
68 #define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23)
70 #define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63)
71 #define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7)
73 static void queue_comp_task(struct ehca_cq
*__cq
);
75 static struct ehca_comp_pool
*pool
;
77 static inline void comp_event_callback(struct ehca_cq
*cq
)
79 if (!cq
->ib_cq
.comp_handler
)
82 spin_lock(&cq
->cb_lock
);
83 cq
->ib_cq
.comp_handler(&cq
->ib_cq
, cq
->ib_cq
.cq_context
);
84 spin_unlock(&cq
->cb_lock
);
89 static void print_error_data(struct ehca_shca
*shca
, void *data
,
90 u64
*rblock
, int length
)
92 u64 type
= EHCA_BMASK_GET(ERROR_DATA_TYPE
, rblock
[2]);
93 u64 resource
= rblock
[1];
96 case 0x1: /* Queue Pair */
98 struct ehca_qp
*qp
= (struct ehca_qp
*)data
;
100 /* only print error data if AER is set */
104 ehca_err(&shca
->ib_device
,
105 "QP 0x%x (resource=%llx) has errors.",
106 qp
->ib_qp
.qp_num
, resource
);
109 case 0x4: /* Completion Queue */
111 struct ehca_cq
*cq
= (struct ehca_cq
*)data
;
113 ehca_err(&shca
->ib_device
,
114 "CQ 0x%x (resource=%llx) has errors.",
115 cq
->cq_number
, resource
);
119 ehca_err(&shca
->ib_device
,
120 "Unknown error type: %llx on %s.",
121 type
, shca
->ib_device
.name
);
125 ehca_err(&shca
->ib_device
, "Error data is available: %llx.", resource
);
126 ehca_err(&shca
->ib_device
, "EHCA ----- error data begin "
127 "---------------------------------------------------");
128 ehca_dmp(rblock
, length
, "resource=%llx", resource
);
129 ehca_err(&shca
->ib_device
, "EHCA ----- error data end "
130 "----------------------------------------------------");
135 int ehca_error_data(struct ehca_shca
*shca
, void *data
,
141 unsigned long block_count
;
143 rblock
= ehca_alloc_fw_ctrlblock(GFP_ATOMIC
);
145 ehca_err(&shca
->ib_device
, "Cannot allocate rblock memory.");
150 /* rblock must be 4K aligned and should be 4K large */
151 ret
= hipz_h_error_data(shca
->ipz_hca_handle
,
156 if (ret
== H_R_STATE
)
157 ehca_err(&shca
->ib_device
,
158 "No error data is available: %llx.", resource
);
159 else if (ret
== H_SUCCESS
) {
162 length
= EHCA_BMASK_GET(ERROR_DATA_LENGTH
, rblock
[0]);
164 if (length
> EHCA_PAGESIZE
)
165 length
= EHCA_PAGESIZE
;
167 print_error_data(shca
, data
, rblock
, length
);
169 ehca_err(&shca
->ib_device
,
170 "Error data could not be fetched: %llx", resource
);
172 ehca_free_fw_ctrlblock(rblock
);
179 static void dispatch_qp_event(struct ehca_shca
*shca
, struct ehca_qp
*qp
,
180 enum ib_event_type event_type
)
182 struct ib_event event
;
184 /* PATH_MIG without the QP ever having been armed is false alarm */
185 if (event_type
== IB_EVENT_PATH_MIG
&& !qp
->mig_armed
)
188 event
.device
= &shca
->ib_device
;
189 event
.event
= event_type
;
191 if (qp
->ext_type
== EQPT_SRQ
) {
192 if (!qp
->ib_srq
.event_handler
)
195 event
.element
.srq
= &qp
->ib_srq
;
196 qp
->ib_srq
.event_handler(&event
, qp
->ib_srq
.srq_context
);
198 if (!qp
->ib_qp
.event_handler
)
201 event
.element
.qp
= &qp
->ib_qp
;
202 qp
->ib_qp
.event_handler(&event
, qp
->ib_qp
.qp_context
);
206 static void qp_event_callback(struct ehca_shca
*shca
, u64 eqe
,
207 enum ib_event_type event_type
, int fatal
)
210 u32 token
= EHCA_BMASK_GET(EQE_QP_TOKEN
, eqe
);
212 read_lock(&ehca_qp_idr_lock
);
213 qp
= idr_find(&ehca_qp_idr
, token
);
215 atomic_inc(&qp
->nr_events
);
216 read_unlock(&ehca_qp_idr_lock
);
222 ehca_error_data(shca
, qp
, qp
->ipz_qp_handle
.handle
);
224 dispatch_qp_event(shca
, qp
, fatal
&& qp
->ext_type
== EQPT_SRQ
?
225 IB_EVENT_SRQ_ERR
: event_type
);
228 * eHCA only processes one WQE at a time for SRQ base QPs,
229 * so the last WQE has been processed as soon as the QP enters
232 if (fatal
&& qp
->ext_type
== EQPT_SRQBASE
)
233 dispatch_qp_event(shca
, qp
, IB_EVENT_QP_LAST_WQE_REACHED
);
235 if (atomic_dec_and_test(&qp
->nr_events
))
236 wake_up(&qp
->wait_completion
);
240 static void cq_event_callback(struct ehca_shca
*shca
,
244 u32 token
= EHCA_BMASK_GET(EQE_CQ_TOKEN
, eqe
);
246 read_lock(&ehca_cq_idr_lock
);
247 cq
= idr_find(&ehca_cq_idr
, token
);
249 atomic_inc(&cq
->nr_events
);
250 read_unlock(&ehca_cq_idr_lock
);
255 ehca_error_data(shca
, cq
, cq
->ipz_cq_handle
.handle
);
257 if (atomic_dec_and_test(&cq
->nr_events
))
258 wake_up(&cq
->wait_completion
);
263 static void parse_identifier(struct ehca_shca
*shca
, u64 eqe
)
265 u8 identifier
= EHCA_BMASK_GET(EQE_EE_IDENTIFIER
, eqe
);
267 switch (identifier
) {
268 case 0x02: /* path migrated */
269 qp_event_callback(shca
, eqe
, IB_EVENT_PATH_MIG
, 0);
271 case 0x03: /* communication established */
272 qp_event_callback(shca
, eqe
, IB_EVENT_COMM_EST
, 0);
274 case 0x04: /* send queue drained */
275 qp_event_callback(shca
, eqe
, IB_EVENT_SQ_DRAINED
, 0);
277 case 0x05: /* QP error */
278 case 0x06: /* QP error */
279 qp_event_callback(shca
, eqe
, IB_EVENT_QP_FATAL
, 1);
281 case 0x07: /* CQ error */
282 case 0x08: /* CQ error */
283 cq_event_callback(shca
, eqe
);
285 case 0x09: /* MRMWPTE error */
286 ehca_err(&shca
->ib_device
, "MRMWPTE error.");
288 case 0x0A: /* port event */
289 ehca_err(&shca
->ib_device
, "Port event.");
291 case 0x0B: /* MR access error */
292 ehca_err(&shca
->ib_device
, "MR access error.");
294 case 0x0C: /* EQ error */
295 ehca_err(&shca
->ib_device
, "EQ error.");
297 case 0x0D: /* P/Q_Key mismatch */
298 ehca_err(&shca
->ib_device
, "P/Q_Key mismatch.");
300 case 0x10: /* sampling complete */
301 ehca_err(&shca
->ib_device
, "Sampling complete.");
303 case 0x11: /* unaffiliated access error */
304 ehca_err(&shca
->ib_device
, "Unaffiliated access error.");
306 case 0x12: /* path migrating */
307 ehca_err(&shca
->ib_device
, "Path migrating.");
309 case 0x13: /* interface trace stopped */
310 ehca_err(&shca
->ib_device
, "Interface trace stopped.");
312 case 0x14: /* first error capture info available */
313 ehca_info(&shca
->ib_device
, "First error capture available");
315 case 0x15: /* SRQ limit reached */
316 qp_event_callback(shca
, eqe
, IB_EVENT_SRQ_LIMIT_REACHED
, 0);
319 ehca_err(&shca
->ib_device
, "Unknown identifier: %x on %s.",
320 identifier
, shca
->ib_device
.name
);
327 static void dispatch_port_event(struct ehca_shca
*shca
, int port_num
,
328 enum ib_event_type type
, const char *msg
)
330 struct ib_event event
;
332 ehca_info(&shca
->ib_device
, "port %d %s.", port_num
, msg
);
333 event
.device
= &shca
->ib_device
;
335 event
.element
.port_num
= port_num
;
336 ib_dispatch_event(&event
);
339 static void notify_port_conf_change(struct ehca_shca
*shca
, int port_num
)
341 struct ehca_sma_attr new_attr
;
342 struct ehca_sma_attr
*old_attr
= &shca
->sport
[port_num
- 1].saved_attr
;
344 ehca_query_sma_attr(shca
, port_num
, &new_attr
);
346 if (new_attr
.sm_sl
!= old_attr
->sm_sl
||
347 new_attr
.sm_lid
!= old_attr
->sm_lid
)
348 dispatch_port_event(shca
, port_num
, IB_EVENT_SM_CHANGE
,
351 if (new_attr
.lid
!= old_attr
->lid
||
352 new_attr
.lmc
!= old_attr
->lmc
)
353 dispatch_port_event(shca
, port_num
, IB_EVENT_LID_CHANGE
,
356 if (new_attr
.pkey_tbl_len
!= old_attr
->pkey_tbl_len
||
357 memcmp(new_attr
.pkeys
, old_attr
->pkeys
,
358 sizeof(u16
) * new_attr
.pkey_tbl_len
))
359 dispatch_port_event(shca
, port_num
, IB_EVENT_PKEY_CHANGE
,
362 *old_attr
= new_attr
;
365 /* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */
366 static int replay_modify_qp(struct ehca_sport
*sport
)
371 spin_lock_irqsave(&sport
->mod_sqp_lock
, flags
);
373 aqp1_destroyed
= !sport
->ibqp_sqp
[IB_QPT_GSI
];
375 if (sport
->ibqp_sqp
[IB_QPT_SMI
])
376 ehca_recover_sqp(sport
->ibqp_sqp
[IB_QPT_SMI
]);
378 ehca_recover_sqp(sport
->ibqp_sqp
[IB_QPT_GSI
]);
380 spin_unlock_irqrestore(&sport
->mod_sqp_lock
, flags
);
382 return aqp1_destroyed
;
385 static void parse_ec(struct ehca_shca
*shca
, u64 eqe
)
387 u8 ec
= EHCA_BMASK_GET(NEQE_EVENT_CODE
, eqe
);
388 u8 port
= EHCA_BMASK_GET(NEQE_PORT_NUMBER
, eqe
);
390 struct ehca_sport
*sport
= &shca
->sport
[port
- 1];
393 case 0x30: /* port availability change */
394 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY
, eqe
)) {
395 /* only replay modify_qp calls in autodetect mode;
396 * if AQP1 was destroyed, the port is already down
397 * again and we can drop the event.
399 if (ehca_nr_ports
< 0)
400 if (replay_modify_qp(sport
))
403 sport
->port_state
= IB_PORT_ACTIVE
;
404 dispatch_port_event(shca
, port
, IB_EVENT_PORT_ACTIVE
,
406 ehca_query_sma_attr(shca
, port
, &sport
->saved_attr
);
408 sport
->port_state
= IB_PORT_DOWN
;
409 dispatch_port_event(shca
, port
, IB_EVENT_PORT_ERR
,
414 /* port configuration change
415 * disruptive change is caused by
416 * LID, PKEY or SM change
418 if (EHCA_BMASK_GET(NEQE_DISRUPTIVE
, eqe
)) {
419 ehca_warn(&shca
->ib_device
, "disruptive port "
420 "%d configuration change", port
);
422 sport
->port_state
= IB_PORT_DOWN
;
423 dispatch_port_event(shca
, port
, IB_EVENT_PORT_ERR
,
426 sport
->port_state
= IB_PORT_ACTIVE
;
427 dispatch_port_event(shca
, port
, IB_EVENT_PORT_ACTIVE
,
429 ehca_query_sma_attr(shca
, port
,
432 notify_port_conf_change(shca
, port
);
434 case 0x32: /* adapter malfunction */
435 ehca_err(&shca
->ib_device
, "Adapter malfunction.");
437 case 0x33: /* trace stopped */
438 ehca_err(&shca
->ib_device
, "Traced stopped.");
440 case 0x34: /* util async event */
441 spec_event
= EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT
, eqe
);
442 if (spec_event
== 0x80) /* client reregister required */
443 dispatch_port_event(shca
, port
,
444 IB_EVENT_CLIENT_REREGISTER
,
445 "client reregister req.");
447 ehca_warn(&shca
->ib_device
, "Unknown util async "
448 "event %x on port %x", spec_event
, port
);
451 ehca_err(&shca
->ib_device
, "Unknown event code: %x on %s.",
452 ec
, shca
->ib_device
.name
);
459 static inline void reset_eq_pending(struct ehca_cq
*cq
)
462 struct h_galpa gal
= cq
->galpas
.kernel
;
464 hipz_galpa_store_cq(gal
, cqx_ep
, 0x0);
465 CQx_EP
= hipz_galpa_load(gal
, CQTEMM_OFFSET(cqx_ep
));
470 irqreturn_t
ehca_interrupt_neq(int irq
, void *dev_id
)
472 struct ehca_shca
*shca
= (struct ehca_shca
*)dev_id
;
474 tasklet_hi_schedule(&shca
->neq
.interrupt_task
);
479 void ehca_tasklet_neq(unsigned long data
)
481 struct ehca_shca
*shca
= (struct ehca_shca
*)data
;
482 struct ehca_eqe
*eqe
;
485 eqe
= ehca_poll_eq(shca
, &shca
->neq
);
488 if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT
, eqe
->entry
))
489 parse_ec(shca
, eqe
->entry
);
491 eqe
= ehca_poll_eq(shca
, &shca
->neq
);
494 ret
= hipz_h_reset_event(shca
->ipz_hca_handle
,
495 shca
->neq
.ipz_eq_handle
, 0xFFFFFFFFFFFFFFFFL
);
497 if (ret
!= H_SUCCESS
)
498 ehca_err(&shca
->ib_device
, "Can't clear notification events.");
503 irqreturn_t
ehca_interrupt_eq(int irq
, void *dev_id
)
505 struct ehca_shca
*shca
= (struct ehca_shca
*)dev_id
;
507 tasklet_hi_schedule(&shca
->eq
.interrupt_task
);
513 static inline void process_eqe(struct ehca_shca
*shca
, struct ehca_eqe
*eqe
)
519 eqe_value
= eqe
->entry
;
520 ehca_dbg(&shca
->ib_device
, "eqe_value=%llx", eqe_value
);
521 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT
, eqe_value
)) {
522 ehca_dbg(&shca
->ib_device
, "Got completion event");
523 token
= EHCA_BMASK_GET(EQE_CQ_TOKEN
, eqe_value
);
524 read_lock(&ehca_cq_idr_lock
);
525 cq
= idr_find(&ehca_cq_idr
, token
);
527 atomic_inc(&cq
->nr_events
);
528 read_unlock(&ehca_cq_idr_lock
);
530 ehca_err(&shca
->ib_device
,
531 "Invalid eqe for non-existing cq token=%x",
535 reset_eq_pending(cq
);
536 if (ehca_scaling_code
)
539 comp_event_callback(cq
);
540 if (atomic_dec_and_test(&cq
->nr_events
))
541 wake_up(&cq
->wait_completion
);
544 ehca_dbg(&shca
->ib_device
, "Got non completion event");
545 parse_identifier(shca
, eqe_value
);
549 void ehca_process_eq(struct ehca_shca
*shca
, int is_irq
)
551 struct ehca_eq
*eq
= &shca
->eq
;
552 struct ehca_eqe_cache_entry
*eqe_cache
= eq
->eqe_cache
;
557 spin_lock(&eq
->irq_spinlock
);
559 const int max_query_cnt
= 100;
563 int_state
= hipz_h_query_int_state(
564 shca
->ipz_hca_handle
, eq
->ist
);
567 } while (int_state
&& query_cnt
< max_query_cnt
);
568 if (unlikely((query_cnt
== max_query_cnt
)))
569 ehca_dbg(&shca
->ib_device
, "int_state=%x query_cnt=%x",
570 int_state
, query_cnt
);
573 /* read out all eqes */
577 eqe_cache
[eqe_cnt
].eqe
= ehca_poll_eq(shca
, eq
);
578 if (!eqe_cache
[eqe_cnt
].eqe
)
580 eqe_value
= eqe_cache
[eqe_cnt
].eqe
->entry
;
581 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT
, eqe_value
)) {
582 token
= EHCA_BMASK_GET(EQE_CQ_TOKEN
, eqe_value
);
583 read_lock(&ehca_cq_idr_lock
);
584 eqe_cache
[eqe_cnt
].cq
= idr_find(&ehca_cq_idr
, token
);
585 if (eqe_cache
[eqe_cnt
].cq
)
586 atomic_inc(&eqe_cache
[eqe_cnt
].cq
->nr_events
);
587 read_unlock(&ehca_cq_idr_lock
);
588 if (!eqe_cache
[eqe_cnt
].cq
) {
589 ehca_err(&shca
->ib_device
,
590 "Invalid eqe for non-existing cq "
595 eqe_cache
[eqe_cnt
].cq
= NULL
;
597 } while (eqe_cnt
< EHCA_EQE_CACHE_SIZE
);
600 ehca_dbg(&shca
->ib_device
,
601 "No eqe found for irq event");
602 goto unlock_irq_spinlock
;
603 } else if (!is_irq
) {
604 ret
= hipz_h_eoi(eq
->ist
);
605 if (ret
!= H_SUCCESS
)
606 ehca_err(&shca
->ib_device
,
607 "bad return code EOI -rc = %lld\n", ret
);
608 ehca_dbg(&shca
->ib_device
, "deadman found %x eqe", eqe_cnt
);
610 if (unlikely(eqe_cnt
== EHCA_EQE_CACHE_SIZE
))
611 ehca_dbg(&shca
->ib_device
, "too many eqes for one irq event");
612 /* enable irq for new packets */
613 for (i
= 0; i
< eqe_cnt
; i
++) {
614 if (eq
->eqe_cache
[i
].cq
)
615 reset_eq_pending(eq
->eqe_cache
[i
].cq
);
618 spin_lock(&eq
->spinlock
);
619 eq_empty
= (!ipz_eqit_eq_peek_valid(&shca
->eq
.ipz_queue
));
620 spin_unlock(&eq
->spinlock
);
621 /* call completion handler for cached eqes */
622 for (i
= 0; i
< eqe_cnt
; i
++)
623 if (eq
->eqe_cache
[i
].cq
) {
624 if (ehca_scaling_code
)
625 queue_comp_task(eq
->eqe_cache
[i
].cq
);
627 struct ehca_cq
*cq
= eq
->eqe_cache
[i
].cq
;
628 comp_event_callback(cq
);
629 if (atomic_dec_and_test(&cq
->nr_events
))
630 wake_up(&cq
->wait_completion
);
633 ehca_dbg(&shca
->ib_device
, "Got non completion event");
634 parse_identifier(shca
, eq
->eqe_cache
[i
].eqe
->entry
);
636 /* poll eq if not empty */
638 goto unlock_irq_spinlock
;
640 struct ehca_eqe
*eqe
;
641 eqe
= ehca_poll_eq(shca
, &shca
->eq
);
644 process_eqe(shca
, eqe
);
648 spin_unlock(&eq
->irq_spinlock
);
651 void ehca_tasklet_eq(unsigned long data
)
653 ehca_process_eq((struct ehca_shca
*)data
, 1);
656 static int find_next_online_cpu(struct ehca_comp_pool
*pool
)
661 WARN_ON_ONCE(!in_interrupt());
662 if (ehca_debug_level
>= 3)
663 ehca_dmp(cpu_online_mask
, cpumask_size(), "");
665 spin_lock_irqsave(&pool
->last_cpu_lock
, flags
);
667 cpu
= cpumask_next(pool
->last_cpu
, cpu_online_mask
);
668 if (cpu
>= nr_cpu_ids
)
669 cpu
= cpumask_first(cpu_online_mask
);
670 pool
->last_cpu
= cpu
;
671 } while (!per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
)->active
);
672 spin_unlock_irqrestore(&pool
->last_cpu_lock
, flags
);
677 static void __queue_comp_task(struct ehca_cq
*__cq
,
678 struct ehca_cpu_comp_task
*cct
,
679 struct task_struct
*thread
)
683 spin_lock_irqsave(&cct
->task_lock
, flags
);
684 spin_lock(&__cq
->task_lock
);
686 if (__cq
->nr_callbacks
== 0) {
687 __cq
->nr_callbacks
++;
688 list_add_tail(&__cq
->entry
, &cct
->cq_list
);
690 wake_up_process(thread
);
692 __cq
->nr_callbacks
++;
694 spin_unlock(&__cq
->task_lock
);
695 spin_unlock_irqrestore(&cct
->task_lock
, flags
);
698 static void queue_comp_task(struct ehca_cq
*__cq
)
701 struct ehca_cpu_comp_task
*cct
;
702 struct task_struct
*thread
;
706 cpu_id
= find_next_online_cpu(pool
);
707 BUG_ON(!cpu_online(cpu_id
));
709 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu_id
);
710 thread
= *per_cpu_ptr(pool
->cpu_comp_threads
, cpu_id
);
711 BUG_ON(!cct
|| !thread
);
713 spin_lock_irqsave(&cct
->task_lock
, flags
);
714 cq_jobs
= cct
->cq_jobs
;
715 spin_unlock_irqrestore(&cct
->task_lock
, flags
);
717 cpu_id
= find_next_online_cpu(pool
);
718 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu_id
);
719 thread
= *per_cpu_ptr(pool
->cpu_comp_threads
, cpu_id
);
720 BUG_ON(!cct
|| !thread
);
722 __queue_comp_task(__cq
, cct
, thread
);
725 static void run_comp_task(struct ehca_cpu_comp_task
*cct
)
729 while (!list_empty(&cct
->cq_list
)) {
730 cq
= list_entry(cct
->cq_list
.next
, struct ehca_cq
, entry
);
731 spin_unlock_irq(&cct
->task_lock
);
733 comp_event_callback(cq
);
734 if (atomic_dec_and_test(&cq
->nr_events
))
735 wake_up(&cq
->wait_completion
);
737 spin_lock_irq(&cct
->task_lock
);
738 spin_lock(&cq
->task_lock
);
740 if (!cq
->nr_callbacks
) {
741 list_del_init(cct
->cq_list
.next
);
744 spin_unlock(&cq
->task_lock
);
748 static void comp_task_park(unsigned int cpu
)
750 struct ehca_cpu_comp_task
*cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
751 struct ehca_cpu_comp_task
*target
;
752 struct task_struct
*thread
;
753 struct ehca_cq
*cq
, *tmp
;
756 spin_lock_irq(&cct
->task_lock
);
759 list_splice_init(&cct
->cq_list
, &list
);
760 spin_unlock_irq(&cct
->task_lock
);
762 cpu
= find_next_online_cpu(pool
);
763 target
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
764 thread
= *per_cpu_ptr(pool
->cpu_comp_threads
, cpu
);
765 spin_lock_irq(&target
->task_lock
);
766 list_for_each_entry_safe(cq
, tmp
, &list
, entry
) {
767 list_del(&cq
->entry
);
768 __queue_comp_task(cq
, target
, thread
);
770 spin_unlock_irq(&target
->task_lock
);
773 static void comp_task_stop(unsigned int cpu
, bool online
)
775 struct ehca_cpu_comp_task
*cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
777 spin_lock_irq(&cct
->task_lock
);
780 WARN_ON(!list_empty(&cct
->cq_list
));
781 spin_unlock_irq(&cct
->task_lock
);
784 static int comp_task_should_run(unsigned int cpu
)
786 struct ehca_cpu_comp_task
*cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
791 static void comp_task(unsigned int cpu
)
793 struct ehca_cpu_comp_task
*cct
= this_cpu_ptr(pool
->cpu_comp_tasks
);
796 spin_lock_irq(&cct
->task_lock
);
797 cql_empty
= list_empty(&cct
->cq_list
);
799 __set_current_state(TASK_RUNNING
);
802 spin_unlock_irq(&cct
->task_lock
);
805 static struct smp_hotplug_thread comp_pool_threads
= {
806 .thread_should_run
= comp_task_should_run
,
807 .thread_fn
= comp_task
,
808 .thread_comm
= "ehca_comp/%u",
809 .cleanup
= comp_task_stop
,
810 .park
= comp_task_park
,
813 int ehca_create_comp_pool(void)
815 int cpu
, ret
= -ENOMEM
;
817 if (!ehca_scaling_code
)
820 pool
= kzalloc(sizeof(struct ehca_comp_pool
), GFP_KERNEL
);
824 spin_lock_init(&pool
->last_cpu_lock
);
825 pool
->last_cpu
= cpumask_any(cpu_online_mask
);
827 pool
->cpu_comp_tasks
= alloc_percpu(struct ehca_cpu_comp_task
);
828 if (!pool
->cpu_comp_tasks
)
831 pool
->cpu_comp_threads
= alloc_percpu(struct task_struct
*);
832 if (!pool
->cpu_comp_threads
)
835 for_each_present_cpu(cpu
) {
836 struct ehca_cpu_comp_task
*cct
;
838 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
839 spin_lock_init(&cct
->task_lock
);
840 INIT_LIST_HEAD(&cct
->cq_list
);
843 comp_pool_threads
.store
= pool
->cpu_comp_threads
;
844 ret
= smpboot_register_percpu_thread(&comp_pool_threads
);
848 pr_info("eHCA scaling code enabled\n");
852 free_percpu(pool
->cpu_comp_threads
);
854 free_percpu(pool
->cpu_comp_tasks
);
860 void ehca_destroy_comp_pool(void)
862 if (!ehca_scaling_code
)
865 smpboot_unregister_percpu_thread(&comp_pool_threads
);
867 free_percpu(pool
->cpu_comp_threads
);
868 free_percpu(pool
->cpu_comp_tasks
);