2 * SN Platform GRU Driver
4 * KERNEL SERVICES THAT USE THE GRU
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include <linux/kernel.h>
24 #include <linux/errno.h>
25 #include <linux/slab.h>
27 #include <linux/smp_lock.h>
28 #include <linux/spinlock.h>
29 #include <linux/device.h>
30 #include <linux/miscdevice.h>
31 #include <linux/proc_fs.h>
32 #include <linux/interrupt.h>
33 #include <linux/uaccess.h>
36 #include "grutables.h"
37 #include "grukservices.h"
38 #include "gru_instructions.h"
39 #include <asm/uv/uv_hub.h>
44 * The following is an interim algorithm for management of kernel GRU
45 * resources. This will likely be replaced when we better understand the
46 * kernel/user requirements.
48 * At boot time, the kernel permanently reserves a fixed number of
49 * CBRs/DSRs for each cpu to use. The resources are all taken from
50 * the GRU chiplet 1 on the blade. This leaves the full set of resources
51 * of chiplet 0 available to be allocated to a single user.
54 /* Blade percpu resources PERMANENTLY reserved for kernel use */
55 #define GRU_NUM_KERNEL_CBR 1
56 #define GRU_NUM_KERNEL_DSR_BYTES 256
57 #define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \
59 #define KERNEL_CTXNUM 15
61 /* GRU instruction attributes for all instructions */
62 #define IMA IMA_CB_DELAY
64 /* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
65 #define __gru_cacheline_aligned__ \
66 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
68 #define MAGIC 0x1234567887654321UL
70 /* Default retry count for GRU errors on kernel instructions */
71 #define EXCEPTION_RETRY_LIMIT 3
73 /* Status of message queue sections */
78 /*----------------- RESOURCE MANAGEMENT -------------------------------------*/
79 /* optimized for x86_64 */
80 struct message_queue
{
81 union gru_mesqhead head __gru_cacheline_aligned__
; /* CL 0 */
82 int qlines
; /* DW 1 */
84 void *next __gru_cacheline_aligned__
;/* CL 1 */
88 char data ____cacheline_aligned
; /* CL 2 */
91 /* First word in every message - used by mesq interface */
92 struct message_header
{
99 #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
101 static int gru_get_cpu_resources(int dsr_bytes
, void **cb
, void **dsr
)
103 struct gru_blade_state
*bs
;
106 BUG_ON(dsr_bytes
> GRU_NUM_KERNEL_DSR_BYTES
);
108 bs
= gru_base
[uv_numa_blade_id()];
109 lcpu
= uv_blade_processor_id();
110 *cb
= bs
->kernel_cb
+ lcpu
* GRU_HANDLE_STRIDE
;
111 *dsr
= bs
->kernel_dsr
+ lcpu
* GRU_NUM_KERNEL_DSR_BYTES
;
115 static void gru_free_cpu_resources(void *cb
, void *dsr
)
120 int gru_get_cb_exception_detail(void *cb
,
121 struct control_block_extended_exc_detail
*excdet
)
123 struct gru_control_block_extended
*cbe
;
125 cbe
= get_cbe(GRUBASE(cb
), get_cb_number(cb
));
126 prefetchw(cbe
); /* Harmless on hardware, required for emulator */
127 excdet
->opc
= cbe
->opccpy
;
128 excdet
->exopc
= cbe
->exopccpy
;
129 excdet
->ecause
= cbe
->ecause
;
130 excdet
->exceptdet0
= cbe
->idef1upd
;
131 excdet
->exceptdet1
= cbe
->idef3upd
;
135 char *gru_get_cb_exception_detail_str(int ret
, void *cb
,
138 struct gru_control_block_status
*gen
= (void *)cb
;
139 struct control_block_extended_exc_detail excdet
;
141 if (ret
> 0 && gen
->istatus
== CBS_EXCEPTION
) {
142 gru_get_cb_exception_detail(cb
, &excdet
);
144 "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
145 "excdet0 0x%lx, excdet1 0x%x",
146 gen
, excdet
.opc
, excdet
.exopc
, excdet
.ecause
,
147 excdet
.exceptdet0
, excdet
.exceptdet1
);
149 snprintf(buf
, size
, "No exception");
154 static int gru_wait_idle_or_exception(struct gru_control_block_status
*gen
)
156 while (gen
->istatus
>= CBS_ACTIVE
) {
163 static int gru_retry_exception(void *cb
)
165 struct gru_control_block_status
*gen
= (void *)cb
;
166 struct control_block_extended_exc_detail excdet
;
167 int retry
= EXCEPTION_RETRY_LIMIT
;
170 if (gru_get_cb_message_queue_substatus(cb
))
172 if (gru_wait_idle_or_exception(gen
) == CBS_IDLE
)
175 gru_get_cb_exception_detail(cb
, &excdet
);
176 if (excdet
.ecause
& ~EXCEPTION_RETRY_BITS
)
181 gru_flush_cache(gen
);
183 return CBS_EXCEPTION
;
186 int gru_check_status_proc(void *cb
)
188 struct gru_control_block_status
*gen
= (void *)cb
;
192 if (ret
!= CBS_EXCEPTION
)
194 return gru_retry_exception(cb
);
198 int gru_wait_proc(void *cb
)
200 struct gru_control_block_status
*gen
= (void *)cb
;
203 ret
= gru_wait_idle_or_exception(gen
);
204 if (ret
== CBS_EXCEPTION
)
205 ret
= gru_retry_exception(cb
);
210 void gru_abort(int ret
, void *cb
, char *str
)
212 char buf
[GRU_EXC_STR_SIZE
];
214 panic("GRU FATAL ERROR: %s - %s\n", str
,
215 gru_get_cb_exception_detail_str(ret
, cb
, buf
, sizeof(buf
)));
218 void gru_wait_abort_proc(void *cb
)
222 ret
= gru_wait_proc(cb
);
224 gru_abort(ret
, cb
, "gru_wait_abort");
228 /*------------------------------ MESSAGE QUEUES -----------------------------*/
230 /* Internal status . These are NOT returned to the user. */
231 #define MQIE_AGAIN -1 /* try again */
235 * Save/restore the "present" flag that is in the second line of 2-line
238 static inline int get_present2(void *p
)
240 struct message_header
*mhdr
= p
+ GRU_CACHE_LINE_BYTES
;
241 return mhdr
->present
;
244 static inline void restore_present2(void *p
, int val
)
246 struct message_header
*mhdr
= p
+ GRU_CACHE_LINE_BYTES
;
251 * Create a message queue.
252 * qlines - message queue size in cache lines. Includes 2-line header.
254 int gru_create_message_queue(struct gru_message_queue_desc
*mqd
,
255 void *p
, unsigned int bytes
, int nasid
, int vector
, int apicid
)
257 struct message_queue
*mq
= p
;
260 qlines
= bytes
/ GRU_CACHE_LINE_BYTES
- 2;
261 memset(mq
, 0, bytes
);
262 mq
->start
= &mq
->data
;
263 mq
->start2
= &mq
->data
+ (qlines
/ 2 - 1) * GRU_CACHE_LINE_BYTES
;
264 mq
->next
= &mq
->data
;
265 mq
->limit
= &mq
->data
+ (qlines
- 2) * GRU_CACHE_LINE_BYTES
;
269 mq
->head
= gru_mesq_head(2, qlines
/ 2 + 1);
271 mqd
->mq_gpa
= uv_gpa(mq
);
272 mqd
->qlines
= qlines
;
273 mqd
->interrupt_pnode
= UV_NASID_TO_PNODE(nasid
);
274 mqd
->interrupt_vector
= vector
;
275 mqd
->interrupt_apicid
= apicid
;
278 EXPORT_SYMBOL_GPL(gru_create_message_queue
);
281 * Send a NOOP message to a message queue
283 * 0 - if queue is full after the send. This is the normal case
284 * but various races can change this.
285 * -1 - if mesq sent successfully but queue not full
286 * >0 - unexpected error. MQE_xxx returned
288 static int send_noop_message(void *cb
, struct gru_message_queue_desc
*mqd
,
291 const struct message_header noop_header
= {
292 .present
= MQS_NOOP
, .lines
= 1};
295 struct message_header save_mhdr
, *mhdr
= mesg
;
300 gru_mesq(cb
, mqd
->mq_gpa
, gru_get_tri(mhdr
), 1, IMA
);
304 substatus
= gru_get_cb_message_queue_substatus(cb
);
307 STAT(mesq_noop_unexpected_error
);
308 ret
= MQE_UNEXPECTED_CB_ERR
;
310 case CBSS_LB_OVERFLOWED
:
311 STAT(mesq_noop_lb_overflow
);
312 ret
= MQE_CONGESTION
;
314 case CBSS_QLIMIT_REACHED
:
315 STAT(mesq_noop_qlimit_reached
);
318 case CBSS_AMO_NACKED
:
319 STAT(mesq_noop_amo_nacked
);
320 ret
= MQE_CONGESTION
;
322 case CBSS_PUT_NACKED
:
323 STAT(mesq_noop_put_nacked
);
324 m
= mqd
->mq_gpa
+ (gru_get_amo_value_head(cb
) << 6);
325 gru_vstore(cb
, m
, gru_get_tri(mesg
), XTYPE_CL
, 1, 1,
327 if (gru_wait(cb
) == CBS_IDLE
)
330 ret
= MQE_UNEXPECTED_CB_ERR
;
332 case CBSS_PAGE_OVERFLOW
:
342 * Handle a gru_mesq full.
344 static int send_message_queue_full(void *cb
, struct gru_message_queue_desc
*mqd
,
345 void *mesg
, int lines
)
347 union gru_mesqhead mqh
;
348 unsigned int limit
, head
;
349 unsigned long avalue
;
352 /* Determine if switching to first/second half of q */
353 avalue
= gru_get_amo_value(cb
);
354 head
= gru_get_amo_value_head(cb
);
355 limit
= gru_get_amo_value_limit(cb
);
357 qlines
= mqd
->qlines
;
358 half
= (limit
!= qlines
);
361 mqh
= gru_mesq_head(qlines
/ 2 + 1, qlines
);
363 mqh
= gru_mesq_head(2, qlines
/ 2 + 1);
365 /* Try to get lock for switching head pointer */
366 gru_gamir(cb
, EOP_IR_CLR
, HSTATUS(mqd
->mq_gpa
, half
), XTYPE_DW
, IMA
);
367 if (gru_wait(cb
) != CBS_IDLE
)
369 if (!gru_get_amo_value(cb
)) {
370 STAT(mesq_qf_locked
);
371 return MQE_QUEUE_FULL
;
374 /* Got the lock. Send optional NOP if queue not full, */
376 if (send_noop_message(cb
, mqd
, mesg
)) {
377 gru_gamir(cb
, EOP_IR_INC
, HSTATUS(mqd
->mq_gpa
, half
),
379 if (gru_wait(cb
) != CBS_IDLE
)
381 STAT(mesq_qf_noop_not_full
);
387 /* Then flip queuehead to other half of queue. */
388 gru_gamer(cb
, EOP_ERR_CSWAP
, mqd
->mq_gpa
, XTYPE_DW
, mqh
.val
, avalue
,
390 if (gru_wait(cb
) != CBS_IDLE
)
393 /* If not successfully in swapping queue head, clear the hstatus lock */
394 if (gru_get_amo_value(cb
) != avalue
) {
395 STAT(mesq_qf_switch_head_failed
);
396 gru_gamir(cb
, EOP_IR_INC
, HSTATUS(mqd
->mq_gpa
, half
), XTYPE_DW
,
398 if (gru_wait(cb
) != CBS_IDLE
)
403 STAT(mesq_qf_unexpected_error
);
404 return MQE_UNEXPECTED_CB_ERR
;
408 * Send a cross-partition interrupt to the SSI that contains the target
409 * message queue. Normally, the interrupt is automatically delivered by hardware
410 * but some error conditions require explicit delivery.
412 static void send_message_queue_interrupt(struct gru_message_queue_desc
*mqd
)
414 if (mqd
->interrupt_vector
)
415 uv_hub_send_ipi(mqd
->interrupt_pnode
, mqd
->interrupt_apicid
,
416 mqd
->interrupt_vector
);
421 * Handle a gru_mesq failure. Some of these failures are software recoverable
424 static int send_message_failure(void *cb
, struct gru_message_queue_desc
*mqd
,
425 void *mesg
, int lines
)
427 int substatus
, ret
= 0;
430 substatus
= gru_get_cb_message_queue_substatus(cb
);
433 STAT(mesq_send_unexpected_error
);
434 ret
= MQE_UNEXPECTED_CB_ERR
;
436 case CBSS_LB_OVERFLOWED
:
437 STAT(mesq_send_lb_overflow
);
438 ret
= MQE_CONGESTION
;
440 case CBSS_QLIMIT_REACHED
:
441 STAT(mesq_send_qlimit_reached
);
442 ret
= send_message_queue_full(cb
, mqd
, mesg
, lines
);
444 case CBSS_AMO_NACKED
:
445 STAT(mesq_send_amo_nacked
);
446 ret
= MQE_CONGESTION
;
448 case CBSS_PUT_NACKED
:
449 STAT(mesq_send_put_nacked
);
450 m
= mqd
->mq_gpa
+ (gru_get_amo_value_head(cb
) << 6);
451 gru_vstore(cb
, m
, gru_get_tri(mesg
), XTYPE_CL
, lines
, 1, IMA
);
452 if (gru_wait(cb
) == CBS_IDLE
) {
454 send_message_queue_interrupt(mqd
);
456 ret
= MQE_UNEXPECTED_CB_ERR
;
466 * Send a message to a message queue
467 * mqd message queue descriptor
468 * mesg message. ust be vaddr within a GSEG
469 * bytes message size (<= 2 CL)
471 int gru_send_message_gpa(struct gru_message_queue_desc
*mqd
, void *mesg
,
474 struct message_header
*mhdr
;
477 int istatus
, clines
, ret
;
480 BUG_ON(bytes
< sizeof(int) || bytes
> 2 * GRU_CACHE_LINE_BYTES
);
482 clines
= DIV_ROUND_UP(bytes
, GRU_CACHE_LINE_BYTES
);
483 if (gru_get_cpu_resources(bytes
, &cb
, &dsr
))
484 return MQE_BUG_NO_RESOURCES
;
485 memcpy(dsr
, mesg
, bytes
);
487 mhdr
->present
= MQS_FULL
;
488 mhdr
->lines
= clines
;
490 mhdr
->present2
= get_present2(mhdr
);
491 restore_present2(mhdr
, MQS_FULL
);
496 gru_mesq(cb
, mqd
->mq_gpa
, gru_get_tri(mhdr
), clines
, IMA
);
497 istatus
= gru_wait(cb
);
498 if (istatus
!= CBS_IDLE
)
499 ret
= send_message_failure(cb
, mqd
, dsr
, clines
);
500 } while (ret
== MQIE_AGAIN
);
501 gru_free_cpu_resources(cb
, dsr
);
504 STAT(mesq_send_failed
);
507 EXPORT_SYMBOL_GPL(gru_send_message_gpa
);
510 * Advance the receive pointer for the queue to the next message.
512 void gru_free_message(struct gru_message_queue_desc
*mqd
, void *mesg
)
514 struct message_queue
*mq
= mqd
->mq
;
515 struct message_header
*mhdr
= mq
->next
;
518 int lines
= mhdr
->lines
;
521 restore_present2(mhdr
, MQS_EMPTY
);
522 mhdr
->present
= MQS_EMPTY
;
525 next
= pnext
+ GRU_CACHE_LINE_BYTES
* lines
;
526 if (next
== mq
->limit
) {
529 } else if (pnext
< mq
->start2
&& next
>= mq
->start2
) {
534 mq
->hstatus
[half
] = 1;
537 EXPORT_SYMBOL_GPL(gru_free_message
);
540 * Get next message from message queue. Return NULL if no message
541 * present. User must call next_message() to move to next message.
544 void *gru_get_next_message(struct gru_message_queue_desc
*mqd
)
546 struct message_queue
*mq
= mqd
->mq
;
547 struct message_header
*mhdr
= mq
->next
;
548 int present
= mhdr
->present
;
550 /* skip NOOP messages */
552 while (present
== MQS_NOOP
) {
553 gru_free_message(mqd
, mhdr
);
555 present
= mhdr
->present
;
558 /* Wait for both halves of 2 line messages */
559 if (present
== MQS_FULL
&& mhdr
->lines
== 2 &&
560 get_present2(mhdr
) == MQS_EMPTY
)
564 STAT(mesq_receive_none
);
568 if (mhdr
->lines
== 2)
569 restore_present2(mhdr
, mhdr
->present2
);
573 EXPORT_SYMBOL_GPL(gru_get_next_message
);
575 /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
578 * Copy a block of data using the GRU resources
580 int gru_copy_gpa(unsigned long dest_gpa
, unsigned long src_gpa
,
588 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES
, &cb
, &dsr
))
589 return MQE_BUG_NO_RESOURCES
;
590 gru_bcopy(cb
, src_gpa
, dest_gpa
, gru_get_tri(dsr
),
591 XTYPE_B
, bytes
, GRU_NUM_KERNEL_DSR_CL
, IMA
);
593 gru_free_cpu_resources(cb
, dsr
);
596 EXPORT_SYMBOL_GPL(gru_copy_gpa
);
598 /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
599 /* Temp - will delete after we gain confidence in the GRU */
600 static __cacheline_aligned
unsigned long word0
;
601 static __cacheline_aligned
unsigned long word1
;
603 static int quicktest(struct gru_state
*gru
)
609 cb
= get_gseg_base_address_cb(gru
->gs_gru_base_vaddr
, KERNEL_CTXNUM
, 0);
610 ds
= get_gseg_base_address_ds(gru
->gs_gru_base_vaddr
, KERNEL_CTXNUM
, 0);
614 gru_vload(cb
, uv_gpa(&word0
), 0, XTYPE_DW
, 1, 1, IMA
);
615 if (gru_wait(cb
) != CBS_IDLE
)
618 if (*(unsigned long *)ds
!= MAGIC
)
620 gru_vstore(cb
, uv_gpa(&word1
), 0, XTYPE_DW
, 1, 1, IMA
);
621 if (gru_wait(cb
) != CBS_IDLE
)
624 if (word0
!= word1
|| word0
!= MAGIC
) {
626 ("GRU quicktest err: gid %d, found 0x%lx, expected 0x%lx\n",
627 gru
->gs_gid
, word1
, MAGIC
);
628 BUG(); /* ZZZ should not be fatal */
635 int gru_kservices_init(struct gru_state
*gru
)
637 struct gru_blade_state
*bs
;
638 struct gru_context_configuration_handle
*cch
;
639 unsigned long cbr_map
, dsr_map
;
640 int err
, num
, cpus_possible
;
643 * Currently, resources are reserved ONLY on the second chiplet
644 * on each blade. This leaves ALL resources on chiplet 0 available
648 if (gru
!= &bs
->bs_grus
[1])
651 cpus_possible
= uv_blade_nr_possible_cpus(gru
->gs_blade_id
);
653 num
= GRU_NUM_KERNEL_CBR
* cpus_possible
;
654 cbr_map
= gru_reserve_cb_resources(gru
, GRU_CB_COUNT_TO_AU(num
), NULL
);
655 gru
->gs_reserved_cbrs
+= num
;
657 num
= GRU_NUM_KERNEL_DSR_BYTES
* cpus_possible
;
658 dsr_map
= gru_reserve_ds_resources(gru
, GRU_DS_BYTES_TO_AU(num
), NULL
);
659 gru
->gs_reserved_dsr_bytes
+= num
;
661 gru
->gs_active_contexts
++;
662 __set_bit(KERNEL_CTXNUM
, &gru
->gs_context_map
);
663 cch
= get_cch(gru
->gs_gru_base_vaddr
, KERNEL_CTXNUM
);
665 bs
->kernel_cb
= get_gseg_base_address_cb(gru
->gs_gru_base_vaddr
,
667 bs
->kernel_dsr
= get_gseg_base_address_ds(gru
->gs_gru_base_vaddr
,
670 lock_cch_handle(cch
);
671 cch
->tfm_fault_bit_enable
= 0;
672 cch
->tlb_int_enable
= 0;
673 cch
->tfm_done_bit_enable
= 0;
674 cch
->unmap_enable
= 1;
675 err
= cch_allocate(cch
, 0, 0, cbr_map
, dsr_map
);
678 "Unable to allocate kernel CCH: gid %d, err %d\n",
682 if (cch_start(cch
)) {
683 gru_dbg(grudev
, "Unable to start kernel CCH: gid %d, err %d\n",
687 unlock_cch_handle(cch
);
689 if (gru_options
& GRU_QUICKLOOK
)
694 void gru_kservices_exit(struct gru_state
*gru
)
696 struct gru_context_configuration_handle
*cch
;
697 struct gru_blade_state
*bs
;
700 if (gru
!= &bs
->bs_grus
[1])
703 cch
= get_cch(gru
->gs_gru_base_vaddr
, KERNEL_CTXNUM
);
704 lock_cch_handle(cch
);
705 if (cch_interrupt_sync(cch
))
707 if (cch_deallocate(cch
))
709 unlock_cch_handle(cch
);