2 * SN Platform GRU Driver
4 * KERNEL SERVICES THAT USE THE GRU
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include <linux/kernel.h>
24 #include <linux/errno.h>
25 #include <linux/slab.h>
27 #include <linux/smp_lock.h>
28 #include <linux/spinlock.h>
29 #include <linux/device.h>
30 #include <linux/miscdevice.h>
31 #include <linux/proc_fs.h>
32 #include <linux/interrupt.h>
33 #include <linux/uaccess.h>
36 #include "grutables.h"
37 #include "grukservices.h"
38 #include "gru_instructions.h"
39 #include <asm/uv/uv_hub.h>
44 * The following is an interim algorithm for management of kernel GRU
45 * resources. This will likely be replaced when we better understand the
46 * kernel/user requirements.
48 * At boot time, the kernel permanently reserves a fixed number of
49 * CBRs/DSRs for each cpu to use. The resources are all taken from
50 * the GRU chiplet 1 on the blade. This leaves the full set of resources
51 * of chiplet 0 available to be allocated to a single user.
54 /* Blade percpu resources PERMANENTLY reserved for kernel use */
55 #define GRU_NUM_KERNEL_CBR 1
56 #define GRU_NUM_KERNEL_DSR_BYTES 256
57 #define KERNEL_CTXNUM 15
59 /* GRU instruction attributes for all instructions */
60 #define IMA IMA_CB_DELAY
62 /* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
63 #define __gru_cacheline_aligned__ \
64 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
66 #define MAGIC 0x1234567887654321UL
68 /* Default retry count for GRU errors on kernel instructions */
69 #define EXCEPTION_RETRY_LIMIT 3
71 /* Status of message queue sections */
76 /*----------------- RESOURCE MANAGEMENT -------------------------------------*/
77 /* optimized for x86_64 */
78 struct message_queue
{
79 union gru_mesqhead head __gru_cacheline_aligned__
; /* CL 0 */
80 int qlines
; /* DW 1 */
82 void *next __gru_cacheline_aligned__
;/* CL 1 */
86 char data ____cacheline_aligned
; /* CL 2 */
89 /* First word in every message - used by mesq interface */
90 struct message_header
{
97 #define QLINES(mq) ((mq) + offsetof(struct message_queue, qlines))
98 #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
100 static int gru_get_cpu_resources(int dsr_bytes
, void **cb
, void **dsr
)
102 struct gru_blade_state
*bs
;
105 BUG_ON(dsr_bytes
> GRU_NUM_KERNEL_DSR_BYTES
);
107 bs
= gru_base
[uv_numa_blade_id()];
108 lcpu
= uv_blade_processor_id();
109 *cb
= bs
->kernel_cb
+ lcpu
* GRU_HANDLE_STRIDE
;
110 *dsr
= bs
->kernel_dsr
+ lcpu
* GRU_NUM_KERNEL_DSR_BYTES
;
114 static void gru_free_cpu_resources(void *cb
, void *dsr
)
119 int gru_get_cb_exception_detail(void *cb
,
120 struct control_block_extended_exc_detail
*excdet
)
122 struct gru_control_block_extended
*cbe
;
124 cbe
= get_cbe(GRUBASE(cb
), get_cb_number(cb
));
125 prefetchw(cbe
); /* Harmless on hardware, required for emulator */
126 excdet
->opc
= cbe
->opccpy
;
127 excdet
->exopc
= cbe
->exopccpy
;
128 excdet
->ecause
= cbe
->ecause
;
129 excdet
->exceptdet0
= cbe
->idef1upd
;
130 excdet
->exceptdet1
= cbe
->idef3upd
;
134 char *gru_get_cb_exception_detail_str(int ret
, void *cb
,
137 struct gru_control_block_status
*gen
= (void *)cb
;
138 struct control_block_extended_exc_detail excdet
;
140 if (ret
> 0 && gen
->istatus
== CBS_EXCEPTION
) {
141 gru_get_cb_exception_detail(cb
, &excdet
);
143 "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
144 "excdet0 0x%lx, excdet1 0x%x",
145 gen
, excdet
.opc
, excdet
.exopc
, excdet
.ecause
,
146 excdet
.exceptdet0
, excdet
.exceptdet1
);
148 snprintf(buf
, size
, "No exception");
153 static int gru_wait_idle_or_exception(struct gru_control_block_status
*gen
)
155 while (gen
->istatus
>= CBS_ACTIVE
) {
162 static int gru_retry_exception(void *cb
)
164 struct gru_control_block_status
*gen
= (void *)cb
;
165 struct control_block_extended_exc_detail excdet
;
166 int retry
= EXCEPTION_RETRY_LIMIT
;
169 if (gru_get_cb_message_queue_substatus(cb
))
171 if (gru_wait_idle_or_exception(gen
) == CBS_IDLE
)
174 gru_get_cb_exception_detail(cb
, &excdet
);
175 if (excdet
.ecause
& ~EXCEPTION_RETRY_BITS
)
180 gru_flush_cache(gen
);
182 return CBS_EXCEPTION
;
185 int gru_check_status_proc(void *cb
)
187 struct gru_control_block_status
*gen
= (void *)cb
;
191 if (ret
!= CBS_EXCEPTION
)
193 return gru_retry_exception(cb
);
197 int gru_wait_proc(void *cb
)
199 struct gru_control_block_status
*gen
= (void *)cb
;
202 ret
= gru_wait_idle_or_exception(gen
);
203 if (ret
== CBS_EXCEPTION
)
204 ret
= gru_retry_exception(cb
);
209 void gru_abort(int ret
, void *cb
, char *str
)
211 char buf
[GRU_EXC_STR_SIZE
];
213 panic("GRU FATAL ERROR: %s - %s\n", str
,
214 gru_get_cb_exception_detail_str(ret
, cb
, buf
, sizeof(buf
)));
217 void gru_wait_abort_proc(void *cb
)
221 ret
= gru_wait_proc(cb
);
223 gru_abort(ret
, cb
, "gru_wait_abort");
227 /*------------------------------ MESSAGE QUEUES -----------------------------*/
229 /* Internal status . These are NOT returned to the user. */
230 #define MQIE_AGAIN -1 /* try again */
234 * Save/restore the "present" flag that is in the second line of 2-line
237 static inline int get_present2(void *p
)
239 struct message_header
*mhdr
= p
+ GRU_CACHE_LINE_BYTES
;
240 return mhdr
->present
;
243 static inline void restore_present2(void *p
, int val
)
245 struct message_header
*mhdr
= p
+ GRU_CACHE_LINE_BYTES
;
250 * Create a message queue.
251 * qlines - message queue size in cache lines. Includes 2-line header.
253 int gru_create_message_queue(void *p
, unsigned int bytes
)
255 struct message_queue
*mq
= p
;
258 qlines
= bytes
/ GRU_CACHE_LINE_BYTES
- 2;
259 memset(mq
, 0, bytes
);
260 mq
->start
= &mq
->data
;
261 mq
->start2
= &mq
->data
+ (qlines
/ 2 - 1) * GRU_CACHE_LINE_BYTES
;
262 mq
->next
= &mq
->data
;
263 mq
->limit
= &mq
->data
+ (qlines
- 2) * GRU_CACHE_LINE_BYTES
;
267 mq
->head
= gru_mesq_head(2, qlines
/ 2 + 1);
270 EXPORT_SYMBOL_GPL(gru_create_message_queue
);
273 * Send a NOOP message to a message queue
275 * 0 - if queue is full after the send. This is the normal case
276 * but various races can change this.
277 * -1 - if mesq sent successfully but queue not full
278 * >0 - unexpected error. MQE_xxx returned
280 static int send_noop_message(void *cb
,
281 unsigned long mq
, void *mesg
)
283 const struct message_header noop_header
= {
284 .present
= MQS_NOOP
, .lines
= 1};
287 struct message_header save_mhdr
, *mhdr
= mesg
;
292 gru_mesq(cb
, mq
, gru_get_tri(mhdr
), 1, IMA
);
296 substatus
= gru_get_cb_message_queue_substatus(cb
);
299 STAT(mesq_noop_unexpected_error
);
300 ret
= MQE_UNEXPECTED_CB_ERR
;
302 case CBSS_LB_OVERFLOWED
:
303 STAT(mesq_noop_lb_overflow
);
304 ret
= MQE_CONGESTION
;
306 case CBSS_QLIMIT_REACHED
:
307 STAT(mesq_noop_qlimit_reached
);
310 case CBSS_AMO_NACKED
:
311 STAT(mesq_noop_amo_nacked
);
312 ret
= MQE_CONGESTION
;
314 case CBSS_PUT_NACKED
:
315 STAT(mesq_noop_put_nacked
);
316 m
= mq
+ (gru_get_amo_value_head(cb
) << 6);
317 gru_vstore(cb
, m
, gru_get_tri(mesg
), XTYPE_CL
, 1, 1,
319 if (gru_wait(cb
) == CBS_IDLE
)
322 ret
= MQE_UNEXPECTED_CB_ERR
;
324 case CBSS_PAGE_OVERFLOW
:
334 * Handle a gru_mesq full.
336 static int send_message_queue_full(void *cb
,
337 unsigned long mq
, void *mesg
, int lines
)
339 union gru_mesqhead mqh
;
340 unsigned int limit
, head
;
341 unsigned long avalue
;
342 int half
, qlines
, save
;
344 /* Determine if switching to first/second half of q */
345 avalue
= gru_get_amo_value(cb
);
346 head
= gru_get_amo_value_head(cb
);
347 limit
= gru_get_amo_value_limit(cb
);
350 * Fetch "qlines" from the queue header. Since the queue may be
351 * in memory that can't be accessed using socket addresses, use
352 * the GRU to access the data. Use DSR space from the message.
355 gru_vload(cb
, QLINES(mq
), gru_get_tri(mesg
), XTYPE_W
, 1, 1, IMA
);
356 if (gru_wait(cb
) != CBS_IDLE
)
358 qlines
= *(int *)mesg
;
360 half
= (limit
!= qlines
);
363 mqh
= gru_mesq_head(qlines
/ 2 + 1, qlines
);
365 mqh
= gru_mesq_head(2, qlines
/ 2 + 1);
367 /* Try to get lock for switching head pointer */
368 gru_gamir(cb
, EOP_IR_CLR
, HSTATUS(mq
, half
), XTYPE_DW
, IMA
);
369 if (gru_wait(cb
) != CBS_IDLE
)
371 if (!gru_get_amo_value(cb
)) {
372 STAT(mesq_qf_locked
);
373 return MQE_QUEUE_FULL
;
376 /* Got the lock. Send optional NOP if queue not full, */
378 if (send_noop_message(cb
, mq
, mesg
)) {
379 gru_gamir(cb
, EOP_IR_INC
, HSTATUS(mq
, half
),
381 if (gru_wait(cb
) != CBS_IDLE
)
383 STAT(mesq_qf_noop_not_full
);
389 /* Then flip queuehead to other half of queue. */
390 gru_gamer(cb
, EOP_ERR_CSWAP
, mq
, XTYPE_DW
, mqh
.val
, avalue
, IMA
);
391 if (gru_wait(cb
) != CBS_IDLE
)
394 /* If not successfully in swapping queue head, clear the hstatus lock */
395 if (gru_get_amo_value(cb
) != avalue
) {
396 STAT(mesq_qf_switch_head_failed
);
397 gru_gamir(cb
, EOP_IR_INC
, HSTATUS(mq
, half
), XTYPE_DW
, IMA
);
398 if (gru_wait(cb
) != CBS_IDLE
)
403 STAT(mesq_qf_unexpected_error
);
404 return MQE_UNEXPECTED_CB_ERR
;
409 * Handle a gru_mesq failure. Some of these failures are software recoverable
412 static int send_message_failure(void *cb
,
417 int substatus
, ret
= 0;
420 substatus
= gru_get_cb_message_queue_substatus(cb
);
423 STAT(mesq_send_unexpected_error
);
424 ret
= MQE_UNEXPECTED_CB_ERR
;
426 case CBSS_LB_OVERFLOWED
:
427 STAT(mesq_send_lb_overflow
);
428 ret
= MQE_CONGESTION
;
430 case CBSS_QLIMIT_REACHED
:
431 STAT(mesq_send_qlimit_reached
);
432 ret
= send_message_queue_full(cb
, mq
, mesg
, lines
);
434 case CBSS_AMO_NACKED
:
435 STAT(mesq_send_amo_nacked
);
436 ret
= MQE_CONGESTION
;
438 case CBSS_PUT_NACKED
:
439 STAT(mesq_send_put_nacked
);
440 m
=mq
+ (gru_get_amo_value_head(cb
) << 6);
441 gru_vstore(cb
, m
, gru_get_tri(mesg
), XTYPE_CL
, lines
, 1, IMA
);
442 if (gru_wait(cb
) == CBS_IDLE
)
445 ret
= MQE_UNEXPECTED_CB_ERR
;
454 * Send a message to a message queue
455 * cb GRU control block to use to send message
457 * mesg message. ust be vaddr within a GSEG
458 * bytes message size (<= 2 CL)
460 int gru_send_message_gpa(unsigned long mq
, void *mesg
, unsigned int bytes
)
462 struct message_header
*mhdr
;
465 int istatus
, clines
, ret
;
468 BUG_ON(bytes
< sizeof(int) || bytes
> 2 * GRU_CACHE_LINE_BYTES
);
470 clines
= DIV_ROUND_UP(bytes
, GRU_CACHE_LINE_BYTES
);
471 if (gru_get_cpu_resources(bytes
, &cb
, &dsr
))
472 return MQE_BUG_NO_RESOURCES
;
473 memcpy(dsr
, mesg
, bytes
);
475 mhdr
->present
= MQS_FULL
;
476 mhdr
->lines
= clines
;
478 mhdr
->present2
= get_present2(mhdr
);
479 restore_present2(mhdr
, MQS_FULL
);
484 gru_mesq(cb
, mq
, gru_get_tri(mhdr
), clines
, IMA
);
485 istatus
= gru_wait(cb
);
486 if (istatus
!= CBS_IDLE
)
487 ret
= send_message_failure(cb
, mq
, dsr
, clines
);
488 } while (ret
== MQIE_AGAIN
);
489 gru_free_cpu_resources(cb
, dsr
);
492 STAT(mesq_send_failed
);
495 EXPORT_SYMBOL_GPL(gru_send_message_gpa
);
498 * Advance the receive pointer for the queue to the next message.
500 void gru_free_message(void *rmq
, void *mesg
)
502 struct message_queue
*mq
= rmq
;
503 struct message_header
*mhdr
= mq
->next
;
506 int lines
= mhdr
->lines
;
509 restore_present2(mhdr
, MQS_EMPTY
);
510 mhdr
->present
= MQS_EMPTY
;
513 next
= pnext
+ GRU_CACHE_LINE_BYTES
* lines
;
514 if (next
== mq
->limit
) {
517 } else if (pnext
< mq
->start2
&& next
>= mq
->start2
) {
522 mq
->hstatus
[half
] = 1;
525 EXPORT_SYMBOL_GPL(gru_free_message
);
528 * Get next message from message queue. Return NULL if no message
529 * present. User must call next_message() to move to next message.
532 void *gru_get_next_message(void *rmq
)
534 struct message_queue
*mq
= rmq
;
535 struct message_header
*mhdr
= mq
->next
;
536 int present
= mhdr
->present
;
538 /* skip NOOP messages */
540 while (present
== MQS_NOOP
) {
541 gru_free_message(rmq
, mhdr
);
543 present
= mhdr
->present
;
546 /* Wait for both halves of 2 line messages */
547 if (present
== MQS_FULL
&& mhdr
->lines
== 2 &&
548 get_present2(mhdr
) == MQS_EMPTY
)
552 STAT(mesq_receive_none
);
556 if (mhdr
->lines
== 2)
557 restore_present2(mhdr
, mhdr
->present2
);
561 EXPORT_SYMBOL_GPL(gru_get_next_message
);
563 /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
566 * Copy a block of data using the GRU resources
568 int gru_copy_gpa(unsigned long dest_gpa
, unsigned long src_gpa
,
576 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES
, &cb
, &dsr
))
577 return MQE_BUG_NO_RESOURCES
;
578 gru_bcopy(cb
, src_gpa
, dest_gpa
, gru_get_tri(dsr
),
579 XTYPE_B
, bytes
, GRU_NUM_KERNEL_DSR_BYTES
, IMA
);
581 gru_free_cpu_resources(cb
, dsr
);
584 EXPORT_SYMBOL_GPL(gru_copy_gpa
);
586 /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
587 /* Temp - will delete after we gain confidence in the GRU */
588 static __cacheline_aligned
unsigned long word0
;
589 static __cacheline_aligned
unsigned long word1
;
591 static int quicktest(struct gru_state
*gru
)
597 cb
= get_gseg_base_address_cb(gru
->gs_gru_base_vaddr
, KERNEL_CTXNUM
, 0);
598 ds
= get_gseg_base_address_ds(gru
->gs_gru_base_vaddr
, KERNEL_CTXNUM
, 0);
602 gru_vload(cb
, uv_gpa(&word0
), 0, XTYPE_DW
, 1, 1, IMA
);
603 if (gru_wait(cb
) != CBS_IDLE
)
606 if (*(unsigned long *)ds
!= MAGIC
)
608 gru_vstore(cb
, uv_gpa(&word1
), 0, XTYPE_DW
, 1, 1, IMA
);
609 if (gru_wait(cb
) != CBS_IDLE
)
612 if (word0
!= word1
|| word0
!= MAGIC
) {
614 ("GRU quicktest err: gru %d, found 0x%lx, expected 0x%lx\n",
615 gru
->gs_gid
, word1
, MAGIC
);
616 BUG(); /* ZZZ should not be fatal */
623 int gru_kservices_init(struct gru_state
*gru
)
625 struct gru_blade_state
*bs
;
626 struct gru_context_configuration_handle
*cch
;
627 unsigned long cbr_map
, dsr_map
;
628 int err
, num
, cpus_possible
;
631 * Currently, resources are reserved ONLY on the second chiplet
632 * on each blade. This leaves ALL resources on chiplet 0 available
636 if (gru
!= &bs
->bs_grus
[1])
639 cpus_possible
= uv_blade_nr_possible_cpus(gru
->gs_blade_id
);
641 num
= GRU_NUM_KERNEL_CBR
* cpus_possible
;
642 cbr_map
= gru_reserve_cb_resources(gru
, GRU_CB_COUNT_TO_AU(num
), NULL
);
643 gru
->gs_reserved_cbrs
+= num
;
645 num
= GRU_NUM_KERNEL_DSR_BYTES
* cpus_possible
;
646 dsr_map
= gru_reserve_ds_resources(gru
, GRU_DS_BYTES_TO_AU(num
), NULL
);
647 gru
->gs_reserved_dsr_bytes
+= num
;
649 gru
->gs_active_contexts
++;
650 __set_bit(KERNEL_CTXNUM
, &gru
->gs_context_map
);
651 cch
= get_cch(gru
->gs_gru_base_vaddr
, KERNEL_CTXNUM
);
653 bs
->kernel_cb
= get_gseg_base_address_cb(gru
->gs_gru_base_vaddr
,
655 bs
->kernel_dsr
= get_gseg_base_address_ds(gru
->gs_gru_base_vaddr
,
658 lock_cch_handle(cch
);
659 cch
->tfm_fault_bit_enable
= 0;
660 cch
->tlb_int_enable
= 0;
661 cch
->tfm_done_bit_enable
= 0;
662 cch
->unmap_enable
= 1;
663 err
= cch_allocate(cch
, 0, cbr_map
, dsr_map
);
666 "Unable to allocate kernel CCH: gru %d, err %d\n",
670 if (cch_start(cch
)) {
671 gru_dbg(grudev
, "Unable to start kernel CCH: gru %d, err %d\n",
675 unlock_cch_handle(cch
);
677 if (gru_options
& GRU_QUICKLOOK
)