fs: use kmem_cache_zalloc instead
[pv_ops_mirror.git] / drivers / infiniband / hw / ehca / ehca_irq.c
blob3f617b27b9543658d901dd1de36caca52371f9e5
1 /*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
4 * Functions for EQs, NEQs and interrupts
6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Khadija Souissi <souissi@de.ibm.com>
8 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
9 * Joachim Fenkes <fenkes@de.ibm.com>
11 * Copyright (c) 2005 IBM Corporation
13 * All rights reserved.
15 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
16 * BSD.
18 * OpenIB BSD License
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions are met:
23 * Redistributions of source code must retain the above copyright notice, this
24 * list of conditions and the following disclaimer.
26 * Redistributions in binary form must reproduce the above copyright notice,
27 * this list of conditions and the following disclaimer in the documentation
28 * and/or other materials
29 * provided with the distribution.
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
39 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 * POSSIBILITY OF SUCH DAMAGE.
44 #include "ehca_classes.h"
45 #include "ehca_irq.h"
46 #include "ehca_iverbs.h"
47 #include "ehca_tools.h"
48 #include "hcp_if.h"
49 #include "hipz_fns.h"
50 #include "ipz_pt_fn.h"
52 #define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
53 #define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
54 #define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7)
55 #define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31)
56 #define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
57 #define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63)
58 #define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63)
60 #define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
61 #define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7)
62 #define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15)
63 #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
64 #define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16)
66 #define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63)
67 #define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7)
69 static void queue_comp_task(struct ehca_cq *__cq);
71 static struct ehca_comp_pool *pool;
73 static inline void comp_event_callback(struct ehca_cq *cq)
75 if (!cq->ib_cq.comp_handler)
76 return;
78 spin_lock(&cq->cb_lock);
79 cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
80 spin_unlock(&cq->cb_lock);
82 return;
85 static void print_error_data(struct ehca_shca *shca, void *data,
86 u64 *rblock, int length)
88 u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
89 u64 resource = rblock[1];
91 switch (type) {
92 case 0x1: /* Queue Pair */
94 struct ehca_qp *qp = (struct ehca_qp *)data;
96 /* only print error data if AER is set */
97 if (rblock[6] == 0)
98 return;
100 ehca_err(&shca->ib_device,
101 "QP 0x%x (resource=%lx) has errors.",
102 qp->ib_qp.qp_num, resource);
103 break;
105 case 0x4: /* Completion Queue */
107 struct ehca_cq *cq = (struct ehca_cq *)data;
109 ehca_err(&shca->ib_device,
110 "CQ 0x%x (resource=%lx) has errors.",
111 cq->cq_number, resource);
112 break;
114 default:
115 ehca_err(&shca->ib_device,
116 "Unknown error type: %lx on %s.",
117 type, shca->ib_device.name);
118 break;
121 ehca_err(&shca->ib_device, "Error data is available: %lx.", resource);
122 ehca_err(&shca->ib_device, "EHCA ----- error data begin "
123 "---------------------------------------------------");
124 ehca_dmp(rblock, length, "resource=%lx", resource);
125 ehca_err(&shca->ib_device, "EHCA ----- error data end "
126 "----------------------------------------------------");
128 return;
131 int ehca_error_data(struct ehca_shca *shca, void *data,
132 u64 resource)
135 unsigned long ret;
136 u64 *rblock;
137 unsigned long block_count;
139 rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
140 if (!rblock) {
141 ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
142 ret = -ENOMEM;
143 goto error_data1;
146 /* rblock must be 4K aligned and should be 4K large */
147 ret = hipz_h_error_data(shca->ipz_hca_handle,
148 resource,
149 rblock,
150 &block_count);
152 if (ret == H_R_STATE)
153 ehca_err(&shca->ib_device,
154 "No error data is available: %lx.", resource);
155 else if (ret == H_SUCCESS) {
156 int length;
158 length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
160 if (length > EHCA_PAGESIZE)
161 length = EHCA_PAGESIZE;
163 print_error_data(shca, data, rblock, length);
164 } else
165 ehca_err(&shca->ib_device,
166 "Error data could not be fetched: %lx", resource);
168 ehca_free_fw_ctrlblock(rblock);
170 error_data1:
171 return ret;
175 static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
176 enum ib_event_type event_type)
178 struct ib_event event;
180 event.device = &shca->ib_device;
181 event.event = event_type;
183 if (qp->ext_type == EQPT_SRQ) {
184 if (!qp->ib_srq.event_handler)
185 return;
187 event.element.srq = &qp->ib_srq;
188 qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
189 } else {
190 if (!qp->ib_qp.event_handler)
191 return;
193 event.element.qp = &qp->ib_qp;
194 qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
198 static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
199 enum ib_event_type event_type, int fatal)
201 struct ehca_qp *qp;
202 u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
204 read_lock(&ehca_qp_idr_lock);
205 qp = idr_find(&ehca_qp_idr, token);
206 read_unlock(&ehca_qp_idr_lock);
208 if (!qp)
209 return;
211 if (fatal)
212 ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
214 dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ?
215 IB_EVENT_SRQ_ERR : event_type);
218 * eHCA only processes one WQE at a time for SRQ base QPs,
219 * so the last WQE has been processed as soon as the QP enters
220 * error state.
222 if (fatal && qp->ext_type == EQPT_SRQBASE)
223 dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
225 return;
228 static void cq_event_callback(struct ehca_shca *shca,
229 u64 eqe)
231 struct ehca_cq *cq;
232 u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
234 read_lock(&ehca_cq_idr_lock);
235 cq = idr_find(&ehca_cq_idr, token);
236 if (cq)
237 atomic_inc(&cq->nr_events);
238 read_unlock(&ehca_cq_idr_lock);
240 if (!cq)
241 return;
243 ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
245 if (atomic_dec_and_test(&cq->nr_events))
246 wake_up(&cq->wait_completion);
248 return;
251 static void parse_identifier(struct ehca_shca *shca, u64 eqe)
253 u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
255 switch (identifier) {
256 case 0x02: /* path migrated */
257 qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
258 break;
259 case 0x03: /* communication established */
260 qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
261 break;
262 case 0x04: /* send queue drained */
263 qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
264 break;
265 case 0x05: /* QP error */
266 case 0x06: /* QP error */
267 qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
268 break;
269 case 0x07: /* CQ error */
270 case 0x08: /* CQ error */
271 cq_event_callback(shca, eqe);
272 break;
273 case 0x09: /* MRMWPTE error */
274 ehca_err(&shca->ib_device, "MRMWPTE error.");
275 break;
276 case 0x0A: /* port event */
277 ehca_err(&shca->ib_device, "Port event.");
278 break;
279 case 0x0B: /* MR access error */
280 ehca_err(&shca->ib_device, "MR access error.");
281 break;
282 case 0x0C: /* EQ error */
283 ehca_err(&shca->ib_device, "EQ error.");
284 break;
285 case 0x0D: /* P/Q_Key mismatch */
286 ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
287 break;
288 case 0x10: /* sampling complete */
289 ehca_err(&shca->ib_device, "Sampling complete.");
290 break;
291 case 0x11: /* unaffiliated access error */
292 ehca_err(&shca->ib_device, "Unaffiliated access error.");
293 break;
294 case 0x12: /* path migrating */
295 ehca_err(&shca->ib_device, "Path migrating.");
296 break;
297 case 0x13: /* interface trace stopped */
298 ehca_err(&shca->ib_device, "Interface trace stopped.");
299 break;
300 case 0x14: /* first error capture info available */
301 ehca_info(&shca->ib_device, "First error capture available");
302 break;
303 case 0x15: /* SRQ limit reached */
304 qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
305 break;
306 default:
307 ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
308 identifier, shca->ib_device.name);
309 break;
312 return;
315 static void dispatch_port_event(struct ehca_shca *shca, int port_num,
316 enum ib_event_type type, const char *msg)
318 struct ib_event event;
320 ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
321 event.device = &shca->ib_device;
322 event.event = type;
323 event.element.port_num = port_num;
324 ib_dispatch_event(&event);
327 static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
329 struct ehca_sma_attr new_attr;
330 struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
332 ehca_query_sma_attr(shca, port_num, &new_attr);
334 if (new_attr.sm_sl != old_attr->sm_sl ||
335 new_attr.sm_lid != old_attr->sm_lid)
336 dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
337 "SM changed");
339 if (new_attr.lid != old_attr->lid ||
340 new_attr.lmc != old_attr->lmc)
341 dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
342 "LID changed");
344 if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
345 memcmp(new_attr.pkeys, old_attr->pkeys,
346 sizeof(u16) * new_attr.pkey_tbl_len))
347 dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
348 "P_Key changed");
350 *old_attr = new_attr;
353 static void parse_ec(struct ehca_shca *shca, u64 eqe)
355 u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
356 u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
358 switch (ec) {
359 case 0x30: /* port availability change */
360 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
361 shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
362 dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
363 "is active");
364 ehca_query_sma_attr(shca, port,
365 &shca->sport[port - 1].saved_attr);
366 } else {
367 shca->sport[port - 1].port_state = IB_PORT_DOWN;
368 dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
369 "is inactive");
371 break;
372 case 0x31:
373 /* port configuration change
374 * disruptive change is caused by
375 * LID, PKEY or SM change
377 if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
378 ehca_warn(&shca->ib_device, "disruptive port "
379 "%d configuration change", port);
381 shca->sport[port - 1].port_state = IB_PORT_DOWN;
382 dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
383 "is inactive");
385 shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
386 dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
387 "is active");
388 } else
389 notify_port_conf_change(shca, port);
390 break;
391 case 0x32: /* adapter malfunction */
392 ehca_err(&shca->ib_device, "Adapter malfunction.");
393 break;
394 case 0x33: /* trace stopped */
395 ehca_err(&shca->ib_device, "Traced stopped.");
396 break;
397 default:
398 ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
399 ec, shca->ib_device.name);
400 break;
403 return;
406 static inline void reset_eq_pending(struct ehca_cq *cq)
408 u64 CQx_EP;
409 struct h_galpa gal = cq->galpas.kernel;
411 hipz_galpa_store_cq(gal, cqx_ep, 0x0);
412 CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
414 return;
417 irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
419 struct ehca_shca *shca = (struct ehca_shca*)dev_id;
421 tasklet_hi_schedule(&shca->neq.interrupt_task);
423 return IRQ_HANDLED;
426 void ehca_tasklet_neq(unsigned long data)
428 struct ehca_shca *shca = (struct ehca_shca*)data;
429 struct ehca_eqe *eqe;
430 u64 ret;
432 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
434 while (eqe) {
435 if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
436 parse_ec(shca, eqe->entry);
438 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
441 ret = hipz_h_reset_event(shca->ipz_hca_handle,
442 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
444 if (ret != H_SUCCESS)
445 ehca_err(&shca->ib_device, "Can't clear notification events.");
447 return;
450 irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
452 struct ehca_shca *shca = (struct ehca_shca*)dev_id;
454 tasklet_hi_schedule(&shca->eq.interrupt_task);
456 return IRQ_HANDLED;
460 static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
462 u64 eqe_value;
463 u32 token;
464 struct ehca_cq *cq;
466 eqe_value = eqe->entry;
467 ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
468 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
469 ehca_dbg(&shca->ib_device, "Got completion event");
470 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
471 read_lock(&ehca_cq_idr_lock);
472 cq = idr_find(&ehca_cq_idr, token);
473 if (cq)
474 atomic_inc(&cq->nr_events);
475 read_unlock(&ehca_cq_idr_lock);
476 if (cq == NULL) {
477 ehca_err(&shca->ib_device,
478 "Invalid eqe for non-existing cq token=%x",
479 token);
480 return;
482 reset_eq_pending(cq);
483 if (ehca_scaling_code)
484 queue_comp_task(cq);
485 else {
486 comp_event_callback(cq);
487 if (atomic_dec_and_test(&cq->nr_events))
488 wake_up(&cq->wait_completion);
490 } else {
491 ehca_dbg(&shca->ib_device, "Got non completion event");
492 parse_identifier(shca, eqe_value);
496 void ehca_process_eq(struct ehca_shca *shca, int is_irq)
498 struct ehca_eq *eq = &shca->eq;
499 struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
500 u64 eqe_value;
501 unsigned long flags;
502 int eqe_cnt, i;
503 int eq_empty = 0;
505 spin_lock_irqsave(&eq->irq_spinlock, flags);
506 if (is_irq) {
507 const int max_query_cnt = 100;
508 int query_cnt = 0;
509 int int_state = 1;
510 do {
511 int_state = hipz_h_query_int_state(
512 shca->ipz_hca_handle, eq->ist);
513 query_cnt++;
514 iosync();
515 } while (int_state && query_cnt < max_query_cnt);
516 if (unlikely((query_cnt == max_query_cnt)))
517 ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
518 int_state, query_cnt);
521 /* read out all eqes */
522 eqe_cnt = 0;
523 do {
524 u32 token;
525 eqe_cache[eqe_cnt].eqe =
526 (struct ehca_eqe *)ehca_poll_eq(shca, eq);
527 if (!eqe_cache[eqe_cnt].eqe)
528 break;
529 eqe_value = eqe_cache[eqe_cnt].eqe->entry;
530 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
531 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
532 read_lock(&ehca_cq_idr_lock);
533 eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
534 if (eqe_cache[eqe_cnt].cq)
535 atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
536 read_unlock(&ehca_cq_idr_lock);
537 if (!eqe_cache[eqe_cnt].cq) {
538 ehca_err(&shca->ib_device,
539 "Invalid eqe for non-existing cq "
540 "token=%x", token);
541 continue;
543 } else
544 eqe_cache[eqe_cnt].cq = NULL;
545 eqe_cnt++;
546 } while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
547 if (!eqe_cnt) {
548 if (is_irq)
549 ehca_dbg(&shca->ib_device,
550 "No eqe found for irq event");
551 goto unlock_irq_spinlock;
552 } else if (!is_irq)
553 ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
554 if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
555 ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
556 /* enable irq for new packets */
557 for (i = 0; i < eqe_cnt; i++) {
558 if (eq->eqe_cache[i].cq)
559 reset_eq_pending(eq->eqe_cache[i].cq);
561 /* check eq */
562 spin_lock(&eq->spinlock);
563 eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
564 spin_unlock(&eq->spinlock);
565 /* call completion handler for cached eqes */
566 for (i = 0; i < eqe_cnt; i++)
567 if (eq->eqe_cache[i].cq) {
568 if (ehca_scaling_code)
569 queue_comp_task(eq->eqe_cache[i].cq);
570 else {
571 struct ehca_cq *cq = eq->eqe_cache[i].cq;
572 comp_event_callback(cq);
573 if (atomic_dec_and_test(&cq->nr_events))
574 wake_up(&cq->wait_completion);
576 } else {
577 ehca_dbg(&shca->ib_device, "Got non completion event");
578 parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
580 /* poll eq if not empty */
581 if (eq_empty)
582 goto unlock_irq_spinlock;
583 do {
584 struct ehca_eqe *eqe;
585 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
586 if (!eqe)
587 break;
588 process_eqe(shca, eqe);
589 } while (1);
591 unlock_irq_spinlock:
592 spin_unlock_irqrestore(&eq->irq_spinlock, flags);
595 void ehca_tasklet_eq(unsigned long data)
597 ehca_process_eq((struct ehca_shca*)data, 1);
600 static inline int find_next_online_cpu(struct ehca_comp_pool *pool)
602 int cpu;
603 unsigned long flags;
605 WARN_ON_ONCE(!in_interrupt());
606 if (ehca_debug_level)
607 ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
609 spin_lock_irqsave(&pool->last_cpu_lock, flags);
610 cpu = next_cpu(pool->last_cpu, cpu_online_map);
611 if (cpu == NR_CPUS)
612 cpu = first_cpu(cpu_online_map);
613 pool->last_cpu = cpu;
614 spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
616 return cpu;
619 static void __queue_comp_task(struct ehca_cq *__cq,
620 struct ehca_cpu_comp_task *cct)
622 unsigned long flags;
624 spin_lock_irqsave(&cct->task_lock, flags);
625 spin_lock(&__cq->task_lock);
627 if (__cq->nr_callbacks == 0) {
628 __cq->nr_callbacks++;
629 list_add_tail(&__cq->entry, &cct->cq_list);
630 cct->cq_jobs++;
631 wake_up(&cct->wait_queue);
632 } else
633 __cq->nr_callbacks++;
635 spin_unlock(&__cq->task_lock);
636 spin_unlock_irqrestore(&cct->task_lock, flags);
639 static void queue_comp_task(struct ehca_cq *__cq)
641 int cpu_id;
642 struct ehca_cpu_comp_task *cct;
643 int cq_jobs;
644 unsigned long flags;
646 cpu_id = find_next_online_cpu(pool);
647 BUG_ON(!cpu_online(cpu_id));
649 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
650 BUG_ON(!cct);
652 spin_lock_irqsave(&cct->task_lock, flags);
653 cq_jobs = cct->cq_jobs;
654 spin_unlock_irqrestore(&cct->task_lock, flags);
655 if (cq_jobs > 0) {
656 cpu_id = find_next_online_cpu(pool);
657 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
658 BUG_ON(!cct);
661 __queue_comp_task(__cq, cct);
664 static void run_comp_task(struct ehca_cpu_comp_task *cct)
666 struct ehca_cq *cq;
667 unsigned long flags;
669 spin_lock_irqsave(&cct->task_lock, flags);
671 while (!list_empty(&cct->cq_list)) {
672 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
673 spin_unlock_irqrestore(&cct->task_lock, flags);
675 comp_event_callback(cq);
676 if (atomic_dec_and_test(&cq->nr_events))
677 wake_up(&cq->wait_completion);
679 spin_lock_irqsave(&cct->task_lock, flags);
680 spin_lock(&cq->task_lock);
681 cq->nr_callbacks--;
682 if (!cq->nr_callbacks) {
683 list_del_init(cct->cq_list.next);
684 cct->cq_jobs--;
686 spin_unlock(&cq->task_lock);
689 spin_unlock_irqrestore(&cct->task_lock, flags);
692 static int comp_task(void *__cct)
694 struct ehca_cpu_comp_task *cct = __cct;
695 int cql_empty;
696 DECLARE_WAITQUEUE(wait, current);
698 set_current_state(TASK_INTERRUPTIBLE);
699 while (!kthread_should_stop()) {
700 add_wait_queue(&cct->wait_queue, &wait);
702 spin_lock_irq(&cct->task_lock);
703 cql_empty = list_empty(&cct->cq_list);
704 spin_unlock_irq(&cct->task_lock);
705 if (cql_empty)
706 schedule();
707 else
708 __set_current_state(TASK_RUNNING);
710 remove_wait_queue(&cct->wait_queue, &wait);
712 spin_lock_irq(&cct->task_lock);
713 cql_empty = list_empty(&cct->cq_list);
714 spin_unlock_irq(&cct->task_lock);
715 if (!cql_empty)
716 run_comp_task(__cct);
718 set_current_state(TASK_INTERRUPTIBLE);
720 __set_current_state(TASK_RUNNING);
722 return 0;
725 static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
726 int cpu)
728 struct ehca_cpu_comp_task *cct;
730 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
731 spin_lock_init(&cct->task_lock);
732 INIT_LIST_HEAD(&cct->cq_list);
733 init_waitqueue_head(&cct->wait_queue);
734 cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
736 return cct->task;
739 static void destroy_comp_task(struct ehca_comp_pool *pool,
740 int cpu)
742 struct ehca_cpu_comp_task *cct;
743 struct task_struct *task;
744 unsigned long flags_cct;
746 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
748 spin_lock_irqsave(&cct->task_lock, flags_cct);
750 task = cct->task;
751 cct->task = NULL;
752 cct->cq_jobs = 0;
754 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
756 if (task)
757 kthread_stop(task);
760 static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu)
762 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
763 LIST_HEAD(list);
764 struct ehca_cq *cq;
765 unsigned long flags_cct;
767 spin_lock_irqsave(&cct->task_lock, flags_cct);
769 list_splice_init(&cct->cq_list, &list);
771 while (!list_empty(&list)) {
772 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
774 list_del(&cq->entry);
775 __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
776 smp_processor_id()));
779 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
783 static int __cpuinit comp_pool_callback(struct notifier_block *nfb,
784 unsigned long action,
785 void *hcpu)
787 unsigned int cpu = (unsigned long)hcpu;
788 struct ehca_cpu_comp_task *cct;
790 switch (action) {
791 case CPU_UP_PREPARE:
792 case CPU_UP_PREPARE_FROZEN:
793 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
794 if (!create_comp_task(pool, cpu)) {
795 ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
796 return NOTIFY_BAD;
798 break;
799 case CPU_UP_CANCELED:
800 case CPU_UP_CANCELED_FROZEN:
801 ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
802 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
803 kthread_bind(cct->task, any_online_cpu(cpu_online_map));
804 destroy_comp_task(pool, cpu);
805 break;
806 case CPU_ONLINE:
807 case CPU_ONLINE_FROZEN:
808 ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
809 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
810 kthread_bind(cct->task, cpu);
811 wake_up_process(cct->task);
812 break;
813 case CPU_DOWN_PREPARE:
814 case CPU_DOWN_PREPARE_FROZEN:
815 ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
816 break;
817 case CPU_DOWN_FAILED:
818 case CPU_DOWN_FAILED_FROZEN:
819 ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
820 break;
821 case CPU_DEAD:
822 case CPU_DEAD_FROZEN:
823 ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
824 destroy_comp_task(pool, cpu);
825 take_over_work(pool, cpu);
826 break;
829 return NOTIFY_OK;
832 static struct notifier_block comp_pool_callback_nb __cpuinitdata = {
833 .notifier_call = comp_pool_callback,
834 .priority = 0,
837 int ehca_create_comp_pool(void)
839 int cpu;
840 struct task_struct *task;
842 if (!ehca_scaling_code)
843 return 0;
845 pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
846 if (pool == NULL)
847 return -ENOMEM;
849 spin_lock_init(&pool->last_cpu_lock);
850 pool->last_cpu = any_online_cpu(cpu_online_map);
852 pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
853 if (pool->cpu_comp_tasks == NULL) {
854 kfree(pool);
855 return -EINVAL;
858 for_each_online_cpu(cpu) {
859 task = create_comp_task(pool, cpu);
860 if (task) {
861 kthread_bind(task, cpu);
862 wake_up_process(task);
866 register_hotcpu_notifier(&comp_pool_callback_nb);
868 printk(KERN_INFO "eHCA scaling code enabled\n");
870 return 0;
873 void ehca_destroy_comp_pool(void)
875 int i;
877 if (!ehca_scaling_code)
878 return;
880 unregister_hotcpu_notifier(&comp_pool_callback_nb);
882 for (i = 0; i < NR_CPUS; i++) {
883 if (cpu_online(i))
884 destroy_comp_task(pool, i);
886 free_percpu(pool->cpu_comp_tasks);
887 kfree(pool);