drivers/misc/sgi-gru/grukservices.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * SN Platform GRU Driver
   4  *
   5  *              KERNEL SERVICES THAT USE THE GRU
   6  *
   7  *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
   8  */
   9
  10 #include <linux/kernel.h>
  11 #include <linux/errno.h>
  12 #include <linux/slab.h>
  13 #include <linux/mm.h>
  14 #include <linux/spinlock.h>
  15 #include <linux/device.h>
  16 #include <linux/miscdevice.h>
  17 #include <linux/proc_fs.h>
  18 #include <linux/interrupt.h>
  19 #include <linux/sync_core.h>
  20 #include <linux/uaccess.h>
  21 #include <linux/delay.h>
  22 #include <linux/export.h>
  23 #include <asm/io_apic.h>
  24 #include "gru.h"
  25 #include "grulib.h"
  26 #include "grutables.h"
  27 #include "grukservices.h"
  28 #include "gru_instructions.h"
  29 #include <asm/uv/uv_hub.h>
  30
  31 /*
  32  * Kernel GRU Usage
  33  *
  34  * The following is an interim algorithm for management of kernel GRU
  35  * resources. This will likely be replaced when we better understand the
  36  * kernel/user requirements.
  37  *
  38  * Blade percpu resources reserved for kernel use. These resources are
  39  * reserved whenever the kernel context for the blade is loaded. Note
  40  * that the kernel context is not guaranteed to be always available. It is
  41  * loaded on demand & can be stolen by a user if the user demand exceeds the
  42  * kernel demand. The kernel can always reload the kernel context but
  43  * a SLEEP may be required!!!.
  44  *
  45  * Async Overview:
  46  *
  47  *      Each blade has one "kernel context" that owns GRU kernel resources
  48  *      located on the blade. Kernel drivers use GRU resources in this context
  49  *      for sending messages, zeroing memory, etc.
  50  *
  51  *      The kernel context is dynamically loaded on demand. If it is not in
  52  *      use by the kernel, the kernel context can be unloaded & given to a user.
  53  *      The kernel context will be reloaded when needed. This may require that
  54  *      a context be stolen from a user.
  55  *              NOTE: frequent unloading/reloading of the kernel context is
  56  *              expensive. We are depending on batch schedulers, cpusets, sane
  57  *              drivers or some other mechanism to prevent the need for frequent
  58  *              stealing/reloading.
  59  *
  60  *      The kernel context consists of two parts:
  61  *              - 1 CB & a few DSRs that are reserved for each cpu on the blade.
  62  *                Each cpu has it's own private resources & does not share them
  63  *                with other cpus. These resources are used serially, ie,
  64  *                locked, used & unlocked  on each call to a function in
  65  *                grukservices.
  66  *                      (Now that we have dynamic loading of kernel contexts, I
  67  *                       may rethink this & allow sharing between cpus....)
  68  *
  69  *              - Additional resources can be reserved long term & used directly
  70  *                by UV drivers located in the kernel. Drivers using these GRU
  71  *                resources can use asynchronous GRU instructions that send
  72  *                interrupts on completion.
  73  *                      - these resources must be explicitly locked/unlocked
  74  *                      - locked resources prevent (obviously) the kernel
  75  *                        context from being unloaded.
  76  *                      - drivers using these resource directly issue their own
  77  *                        GRU instruction and must wait/check completion.
  78  *
  79  *                When these resources are reserved, the caller can optionally
  80  *                associate a wait_queue with the resources and use asynchronous
  81  *                GRU instructions. When an async GRU instruction completes, the
  82  *                driver will do a wakeup on the event.
  83  *
  84  */
  85
  86
  87 #define ASYNC_HAN_TO_BID(h)     ((h) - 1)
  88 #define ASYNC_BID_TO_HAN(b)     ((b) + 1)
  89 #define ASYNC_HAN_TO_BS(h)      gru_base[ASYNC_HAN_TO_BID(h)]
  90
  91 #define GRU_NUM_KERNEL_CBR      1
  92 #define GRU_NUM_KERNEL_DSR_BYTES 256
  93 #define GRU_NUM_KERNEL_DSR_CL   (GRU_NUM_KERNEL_DSR_BYTES /             \
  94                                         GRU_CACHE_LINE_BYTES)
  95
  96 /* GRU instruction attributes for all instructions */
  97 #define IMA                     IMA_CB_DELAY
  98
  99 /* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
 100 #define __gru_cacheline_aligned__                               \
 101         __attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
 102
 103 #define MAGIC   0x1234567887654321UL
 104
 105 /* Default retry count for GRU errors on kernel instructions */
 106 #define EXCEPTION_RETRY_LIMIT   3
 107
 108 /* Status of message queue sections */
 109 #define MQS_EMPTY               0
 110 #define MQS_FULL                1
 111 #define MQS_NOOP                2
 112
 113 /*----------------- RESOURCE MANAGEMENT -------------------------------------*/
 114 /* optimized for x86_64 */
 115 struct message_queue {
 116         union gru_mesqhead      head __gru_cacheline_aligned__; /* CL 0 */
 117         int                     qlines;                         /* DW 1 */
 118         long                    hstatus[2];
 119         void                    *next __gru_cacheline_aligned__;/* CL 1 */
 120         void                    *limit;
 121         void                    *start;
 122         void                    *start2;
 123         char                    data ____cacheline_aligned;     /* CL 2 */
 124 };
 125
 126 /* First word in every message - used by mesq interface */
 127 struct message_header {
 128         char    present;
 129         char    present2;
 130         char    lines;
 131         char    fill;
 132 };
 133
 134 #define HSTATUS(mq, h)  ((mq) + offsetof(struct message_queue, hstatus[h]))
 135
 136 /*
 137  * Reload the blade's kernel context into a GRU chiplet. Called holding
 138  * the bs_kgts_sema for READ. Will steal user contexts if necessary.
 139  */
 140 static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
 141 {
 142         struct gru_state *gru;
 143         struct gru_thread_state *kgts;
 144         void *vaddr;
 145         int ctxnum, ncpus;
 146
 147         up_read(&bs->bs_kgts_sema);
 148         down_write(&bs->bs_kgts_sema);
 149
 150         if (!bs->bs_kgts) {
 151                 do {
 152                         bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
 153                         if (!IS_ERR(bs->bs_kgts))
 154                                 break;
 155                         msleep(1);
 156                 } while (true);
 157                 bs->bs_kgts->ts_user_blade_id = blade_id;
 158         }
 159         kgts = bs->bs_kgts;
 160
 161         if (!kgts->ts_gru) {
 162                 STAT(load_kernel_context);
 163                 ncpus = uv_blade_nr_possible_cpus(blade_id);
 164                 kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(
 165                         GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);
 166                 kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
 167                         GRU_NUM_KERNEL_DSR_BYTES * ncpus +
 168                                 bs->bs_async_dsr_bytes);
 169                 while (!gru_assign_gru_context(kgts)) {
 170                         msleep(1);
 171                         gru_steal_context(kgts);
 172                 }
 173                 gru_load_context(kgts);
 174                 gru = bs->bs_kgts->ts_gru;
 175                 vaddr = gru->gs_gru_base_vaddr;
 176                 ctxnum = kgts->ts_ctxnum;
 177                 bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0);
 178                 bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0);
 179         }
 180         downgrade_write(&bs->bs_kgts_sema);
 181 }
 182
 183 /*
 184  * Free all kernel contexts that are not currently in use.
 185  *   Returns 0 if all freed, else number of inuse context.
 186  */
 187 static int gru_free_kernel_contexts(void)
 188 {
 189         struct gru_blade_state *bs;
 190         struct gru_thread_state *kgts;
 191         int bid, ret = 0;
 192
 193         for (bid = 0; bid < GRU_MAX_BLADES; bid++) {
 194                 bs = gru_base[bid];
 195                 if (!bs)
 196                         continue;
 197
 198                 /* Ignore busy contexts. Don't want to block here.  */
 199                 if (down_write_trylock(&bs->bs_kgts_sema)) {
 200                         kgts = bs->bs_kgts;
 201                         if (kgts && kgts->ts_gru)
 202                                 gru_unload_context(kgts, 0);
 203                         bs->bs_kgts = NULL;
 204                         up_write(&bs->bs_kgts_sema);
 205                         kfree(kgts);
 206                 } else {
 207                         ret++;
 208                 }
 209         }
 210         return ret;
 211 }
 212
 213 /*
 214  * Lock & load the kernel context for the specified blade.
 215  */
 216 static struct gru_blade_state *gru_lock_kernel_context(int blade_id)
 217 {
 218         struct gru_blade_state *bs;
 219         int bid;
 220
 221         STAT(lock_kernel_context);
 222 again:
 223         bid = blade_id < 0 ? uv_numa_blade_id() : blade_id;
 224         bs = gru_base[bid];
 225
 226         /* Handle the case where migration occurred while waiting for the sema */
 227         down_read(&bs->bs_kgts_sema);
 228         if (blade_id < 0 && bid != uv_numa_blade_id()) {
 229                 up_read(&bs->bs_kgts_sema);
 230                 goto again;
 231         }
 232         if (!bs->bs_kgts || !bs->bs_kgts->ts_gru)
 233                 gru_load_kernel_context(bs, bid);
 234         return bs;
 235
 236 }
 237
 238 /*
 239  * Unlock the kernel context for the specified blade. Context is not
 240  * unloaded but may be stolen before next use.
 241  */
 242 static void gru_unlock_kernel_context(int blade_id)
 243 {
 244         struct gru_blade_state *bs;
 245
 246         bs = gru_base[blade_id];
 247         up_read(&bs->bs_kgts_sema);
 248         STAT(unlock_kernel_context);
 249 }
 250
 251 /*
 252  * Reserve & get pointers to the DSR/CBRs reserved for the current cpu.
 253  *      - returns with preemption disabled
 254  */
 255 static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
 256 {
 257         struct gru_blade_state *bs;
 258         int lcpu;
 259
 260         BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
 261         bs = gru_lock_kernel_context(-1);
 262         lcpu = uv_blade_processor_id();
 263         *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
 264         *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
 265         return 0;
 266 }
 267
 268 /*
 269  * Free the current cpus reserved DSR/CBR resources.
 270  */
 271 static void gru_free_cpu_resources(void *cb, void *dsr)
 272 {
 273         gru_unlock_kernel_context(uv_numa_blade_id());
 274 }
 275
 276 /*
 277  * Reserve GRU resources to be used asynchronously.
 278  *   Note: currently supports only 1 reservation per blade.
 279  *
 280  *      input:
 281  *              blade_id  - blade on which resources should be reserved
 282  *              cbrs      - number of CBRs
 283  *              dsr_bytes - number of DSR bytes needed
 284  *      output:
 285  *              handle to identify resource
 286  *              (0 = async resources already reserved)
 287  */
 288 unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
 289                         struct completion *cmp)
 290 {
 291         struct gru_blade_state *bs;
 292         struct gru_thread_state *kgts;
 293         int ret = 0;
 294
 295         bs = gru_base[blade_id];
 296
 297         down_write(&bs->bs_kgts_sema);
 298
 299         /* Verify no resources already reserved */
 300         if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs)
 301                 goto done;
 302         bs->bs_async_dsr_bytes = dsr_bytes;
 303         bs->bs_async_cbrs = cbrs;
 304         bs->bs_async_wq = cmp;
 305         kgts = bs->bs_kgts;
 306
 307         /* Resources changed. Unload context if already loaded */
 308         if (kgts && kgts->ts_gru)
 309                 gru_unload_context(kgts, 0);
 310         ret = ASYNC_BID_TO_HAN(blade_id);
 311
 312 done:
 313         up_write(&bs->bs_kgts_sema);
 314         return ret;
 315 }
 316
 317 /*
 318  * Release async resources previously reserved.
 319  *
 320  *      input:
 321  *              han - handle to identify resources
 322  */
 323 void gru_release_async_resources(unsigned long han)
 324 {
 325         struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
 326
 327         down_write(&bs->bs_kgts_sema);
 328         bs->bs_async_dsr_bytes = 0;
 329         bs->bs_async_cbrs = 0;
 330         bs->bs_async_wq = NULL;
 331         up_write(&bs->bs_kgts_sema);
 332 }
 333
 334 /*
 335  * Wait for async GRU instructions to complete.
 336  *
 337  *      input:
 338  *              han - handle to identify resources
 339  */
 340 void gru_wait_async_cbr(unsigned long han)
 341 {
 342         struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
 343
 344         wait_for_completion(bs->bs_async_wq);
 345         mb();
 346 }
 347
 348 /*
 349  * Lock previous reserved async GRU resources
 350  *
 351  *      input:
 352  *              han - handle to identify resources
 353  *      output:
 354  *              cb  - pointer to first CBR
 355  *              dsr - pointer to first DSR
 356  */
 357 void gru_lock_async_resource(unsigned long han,  void **cb, void **dsr)
 358 {
 359         struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
 360         int blade_id = ASYNC_HAN_TO_BID(han);
 361         int ncpus;
 362
 363         gru_lock_kernel_context(blade_id);
 364         ncpus = uv_blade_nr_possible_cpus(blade_id);
 365         if (cb)
 366                 *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE;
 367         if (dsr)
 368                 *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES;
 369 }
 370
 371 /*
 372  * Unlock previous reserved async GRU resources
 373  *
 374  *      input:
 375  *              han - handle to identify resources
 376  */
 377 void gru_unlock_async_resource(unsigned long han)
 378 {
 379         int blade_id = ASYNC_HAN_TO_BID(han);
 380
 381         gru_unlock_kernel_context(blade_id);
 382 }
 383
 384 /*----------------------------------------------------------------------*/
 385 int gru_get_cb_exception_detail(void *cb,
 386                 struct control_block_extended_exc_detail *excdet)
 387 {
 388         struct gru_control_block_extended *cbe;
 389         struct gru_thread_state *kgts = NULL;
 390         unsigned long off;
 391         int cbrnum, bid;
 392
 393         /*
 394          * Locate kgts for cb. This algorithm is SLOW but
 395          * this function is rarely called (ie., almost never).
 396          * Performance does not matter.
 397          */
 398         for_each_possible_blade(bid) {
 399                 if (!gru_base[bid])
 400                         break;
 401                 kgts = gru_base[bid]->bs_kgts;
 402                 if (!kgts || !kgts->ts_gru)
 403                         continue;
 404                 off = cb - kgts->ts_gru->gs_gru_base_vaddr;
 405                 if (off < GRU_SIZE)
 406                         break;
 407                 kgts = NULL;
 408         }
 409         BUG_ON(!kgts);
 410         cbrnum = thread_cbr_number(kgts, get_cb_number(cb));
 411         cbe = get_cbe(GRUBASE(cb), cbrnum);
 412         gru_flush_cache(cbe);   /* CBE not coherent */
 413         sync_core();
 414         excdet->opc = cbe->opccpy;
 415         excdet->exopc = cbe->exopccpy;
 416         excdet->ecause = cbe->ecause;
 417         excdet->exceptdet0 = cbe->idef1upd;
 418         excdet->exceptdet1 = cbe->idef3upd;
 419         gru_flush_cache(cbe);
 420         return 0;
 421 }
 422
 423 static char *gru_get_cb_exception_detail_str(int ret, void *cb,
 424                                              char *buf, int size)
 425 {
 426         struct gru_control_block_status *gen = cb;
 427         struct control_block_extended_exc_detail excdet;
 428
 429         if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
 430                 gru_get_cb_exception_detail(cb, &excdet);
 431                 snprintf(buf, size,
 432                         "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
 433                         "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(),
 434                         gen, excdet.opc, excdet.exopc, excdet.ecause,
 435                         excdet.exceptdet0, excdet.exceptdet1);
 436         } else {
 437                 snprintf(buf, size, "No exception");
 438         }
 439         return buf;
 440 }
 441
 442 static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)
 443 {
 444         while (gen->istatus >= CBS_ACTIVE) {
 445                 cpu_relax();
 446                 barrier();
 447         }
 448         return gen->istatus;
 449 }
 450
 451 static int gru_retry_exception(void *cb)
 452 {
 453         struct gru_control_block_status *gen = cb;
 454         struct control_block_extended_exc_detail excdet;
 455         int retry = EXCEPTION_RETRY_LIMIT;
 456
 457         while (1)  {
 458                 if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
 459                         return CBS_IDLE;
 460                 if (gru_get_cb_message_queue_substatus(cb))
 461                         return CBS_EXCEPTION;
 462                 gru_get_cb_exception_detail(cb, &excdet);
 463                 if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) ||
 464                                 (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC))
 465                         break;
 466                 if (retry-- == 0)
 467                         break;
 468                 gen->icmd = 1;
 469                 gru_flush_cache(gen);
 470         }
 471         return CBS_EXCEPTION;
 472 }
 473
 474 int gru_check_status_proc(void *cb)
 475 {
 476         struct gru_control_block_status *gen = cb;
 477         int ret;
 478
 479         ret = gen->istatus;
 480         if (ret == CBS_EXCEPTION)
 481                 ret = gru_retry_exception(cb);
 482         rmb();
 483         return ret;
 484
 485 }
 486
 487 int gru_wait_proc(void *cb)
 488 {
 489         struct gru_control_block_status *gen = cb;
 490         int ret;
 491
 492         ret = gru_wait_idle_or_exception(gen);
 493         if (ret == CBS_EXCEPTION)
 494                 ret = gru_retry_exception(cb);
 495         rmb();
 496         return ret;
 497 }
 498
 499 static void gru_abort(int ret, void *cb, char *str)
 500 {
 501         char buf[GRU_EXC_STR_SIZE];
 502
 503         panic("GRU FATAL ERROR: %s - %s\n", str,
 504               gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf)));
 505 }
 506
 507 void gru_wait_abort_proc(void *cb)
 508 {
 509         int ret;
 510
 511         ret = gru_wait_proc(cb);
 512         if (ret)
 513                 gru_abort(ret, cb, "gru_wait_abort");
 514 }
 515
 516
 517 /*------------------------------ MESSAGE QUEUES -----------------------------*/
 518
 519 /* Internal status . These are NOT returned to the user. */
 520 #define MQIE_AGAIN              -1      /* try again */
 521
 522
 523 /*
 524  * Save/restore the "present" flag that is in the second line of 2-line
 525  * messages
 526  */
 527 static inline int get_present2(void *p)
 528 {
 529         struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
 530         return mhdr->present;
 531 }
 532
 533 static inline void restore_present2(void *p, int val)
 534 {
 535         struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
 536         mhdr->present = val;
 537 }
 538
 539 /*
 540  * Create a message queue.
 541  *      qlines - message queue size in cache lines. Includes 2-line header.
 542  */
 543 int gru_create_message_queue(struct gru_message_queue_desc *mqd,
 544                 void *p, unsigned int bytes, int nasid, int vector, int apicid)
 545 {
 546         struct message_queue *mq = p;
 547         unsigned int qlines;
 548
 549         qlines = bytes / GRU_CACHE_LINE_BYTES - 2;
 550         memset(mq, 0, bytes);
 551         mq->start = &mq->data;
 552         mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES;
 553         mq->next = &mq->data;
 554         mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES;
 555         mq->qlines = qlines;
 556         mq->hstatus[0] = 0;
 557         mq->hstatus[1] = 1;
 558         mq->head = gru_mesq_head(2, qlines / 2 + 1);
 559         mqd->mq = mq;
 560         mqd->mq_gpa = uv_gpa(mq);
 561         mqd->qlines = qlines;
 562         mqd->interrupt_pnode = nasid >> 1;
 563         mqd->interrupt_vector = vector;
 564         mqd->interrupt_apicid = apicid;
 565         return 0;
 566 }
 567 EXPORT_SYMBOL_GPL(gru_create_message_queue);
 568
 569 /*
 570  * Send a NOOP message to a message queue
 571  *      Returns:
 572  *               0 - if queue is full after the send. This is the normal case
 573  *                   but various races can change this.
 574  *              -1 - if mesq sent successfully but queue not full
 575  *              >0 - unexpected error. MQE_xxx returned
 576  */
 577 static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd,
 578                                 void *mesg)
 579 {
 580         const struct message_header noop_header = {
 581                                         .present = MQS_NOOP, .lines = 1};
 582         unsigned long m;
 583         int substatus, ret;
 584         struct message_header save_mhdr, *mhdr = mesg;
 585
 586         STAT(mesq_noop);
 587         save_mhdr = *mhdr;
 588         *mhdr = noop_header;
 589         gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA);
 590         ret = gru_wait(cb);
 591
 592         if (ret) {
 593                 substatus = gru_get_cb_message_queue_substatus(cb);
 594                 switch (substatus) {
 595                 case CBSS_NO_ERROR:
 596                         STAT(mesq_noop_unexpected_error);
 597                         ret = MQE_UNEXPECTED_CB_ERR;
 598                         break;
 599                 case CBSS_LB_OVERFLOWED:
 600                         STAT(mesq_noop_lb_overflow);
 601                         ret = MQE_CONGESTION;
 602                         break;
 603                 case CBSS_QLIMIT_REACHED:
 604                         STAT(mesq_noop_qlimit_reached);
 605                         ret = 0;
 606                         break;
 607                 case CBSS_AMO_NACKED:
 608                         STAT(mesq_noop_amo_nacked);
 609                         ret = MQE_CONGESTION;
 610                         break;
 611                 case CBSS_PUT_NACKED:
 612                         STAT(mesq_noop_put_nacked);
 613                         m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
 614                         gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,
 615                                                 IMA);
 616                         if (gru_wait(cb) == CBS_IDLE)
 617                                 ret = MQIE_AGAIN;
 618                         else
 619                                 ret = MQE_UNEXPECTED_CB_ERR;
 620                         break;
 621                 case CBSS_PAGE_OVERFLOW:
 622                         STAT(mesq_noop_page_overflow);
 623                         fallthrough;
 624                 default:
 625                         BUG();
 626                 }
 627         }
 628         *mhdr = save_mhdr;
 629         return ret;
 630 }
 631
 632 /*
 633  * Handle a gru_mesq full.
 634  */
 635 static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd,
 636                                 void *mesg, int lines)
 637 {
 638         union gru_mesqhead mqh;
 639         unsigned int limit, head;
 640         unsigned long avalue;
 641         int half, qlines;
 642
 643         /* Determine if switching to first/second half of q */
 644         avalue = gru_get_amo_value(cb);
 645         head = gru_get_amo_value_head(cb);
 646         limit = gru_get_amo_value_limit(cb);
 647
 648         qlines = mqd->qlines;
 649         half = (limit != qlines);
 650
 651         if (half)
 652                 mqh = gru_mesq_head(qlines / 2 + 1, qlines);
 653         else
 654                 mqh = gru_mesq_head(2, qlines / 2 + 1);
 655
 656         /* Try to get lock for switching head pointer */
 657         gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA);
 658         if (gru_wait(cb) != CBS_IDLE)
 659                 goto cberr;
 660         if (!gru_get_amo_value(cb)) {
 661                 STAT(mesq_qf_locked);
 662                 return MQE_QUEUE_FULL;
 663         }
 664
 665         /* Got the lock. Send optional NOP if queue not full, */
 666         if (head != limit) {
 667                 if (send_noop_message(cb, mqd, mesg)) {
 668                         gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half),
 669                                         XTYPE_DW, IMA);
 670                         if (gru_wait(cb) != CBS_IDLE)
 671                                 goto cberr;
 672                         STAT(mesq_qf_noop_not_full);
 673                         return MQIE_AGAIN;
 674                 }
 675                 avalue++;
 676         }
 677
 678         /* Then flip queuehead to other half of queue. */
 679         gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue,
 680                                                         IMA);
 681         if (gru_wait(cb) != CBS_IDLE)
 682                 goto cberr;
 683
 684         /* If not successfully in swapping queue head, clear the hstatus lock */
 685         if (gru_get_amo_value(cb) != avalue) {
 686                 STAT(mesq_qf_switch_head_failed);
 687                 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW,
 688                                                         IMA);
 689                 if (gru_wait(cb) != CBS_IDLE)
 690                         goto cberr;
 691         }
 692         return MQIE_AGAIN;
 693 cberr:
 694         STAT(mesq_qf_unexpected_error);
 695         return MQE_UNEXPECTED_CB_ERR;
 696 }
 697
 698 /*
 699  * Handle a PUT failure. Note: if message was a 2-line message, one of the
 700  * lines might have successfully have been written. Before sending the
 701  * message, "present" must be cleared in BOTH lines to prevent the receiver
 702  * from prematurely seeing the full message.
 703  */
 704 static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
 705                         void *mesg, int lines)
 706 {
 707         unsigned long m;
 708         int ret, loops = 200;   /* experimentally determined */
 709
 710         m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
 711         if (lines == 2) {
 712                 gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA);
 713                 if (gru_wait(cb) != CBS_IDLE)
 714                         return MQE_UNEXPECTED_CB_ERR;
 715         }
 716         gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
 717         if (gru_wait(cb) != CBS_IDLE)
 718                 return MQE_UNEXPECTED_CB_ERR;
 719
 720         if (!mqd->interrupt_vector)
 721                 return MQE_OK;
 722
 723         /*
 724          * Send a noop message in order to deliver a cross-partition interrupt
 725          * to the SSI that contains the target message queue. Normally, the
 726          * interrupt is automatically delivered by hardware following mesq
 727          * operations, but some error conditions require explicit delivery.
 728          * The noop message will trigger delivery. Otherwise partition failures
 729          * could cause unrecovered errors.
 730          */
 731         do {
 732                 ret = send_noop_message(cb, mqd, mesg);
 733         } while ((ret == MQIE_AGAIN || ret == MQE_CONGESTION) && (loops-- > 0));
 734
 735         if (ret == MQIE_AGAIN || ret == MQE_CONGESTION) {
 736                 /*
 737                  * Don't indicate to the app to resend the message, as it's
 738                  * already been successfully sent.  We simply send an OK
 739                  * (rather than fail the send with MQE_UNEXPECTED_CB_ERR),
 740                  * assuming that the other side is receiving enough
 741                  * interrupts to get this message processed anyway.
 742                  */
 743                 ret = MQE_OK;
 744         }
 745         return ret;
 746 }
 747
 748 /*
 749  * Handle a gru_mesq failure. Some of these failures are software recoverable
 750  * or retryable.
 751  */
 752 static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
 753                                 void *mesg, int lines)
 754 {
 755         int substatus, ret = 0;
 756
 757         substatus = gru_get_cb_message_queue_substatus(cb);
 758         switch (substatus) {
 759         case CBSS_NO_ERROR:
 760                 STAT(mesq_send_unexpected_error);
 761                 ret = MQE_UNEXPECTED_CB_ERR;
 762                 break;
 763         case CBSS_LB_OVERFLOWED:
 764                 STAT(mesq_send_lb_overflow);
 765                 ret = MQE_CONGESTION;
 766                 break;
 767         case CBSS_QLIMIT_REACHED:
 768                 STAT(mesq_send_qlimit_reached);
 769                 ret = send_message_queue_full(cb, mqd, mesg, lines);
 770                 break;
 771         case CBSS_AMO_NACKED:
 772                 STAT(mesq_send_amo_nacked);
 773                 ret = MQE_CONGESTION;
 774                 break;
 775         case CBSS_PUT_NACKED:
 776                 STAT(mesq_send_put_nacked);
 777                 ret = send_message_put_nacked(cb, mqd, mesg, lines);
 778                 break;
 779         case CBSS_PAGE_OVERFLOW:
 780                 STAT(mesq_page_overflow);
 781                 fallthrough;
 782         default:
 783                 BUG();
 784         }
 785         return ret;
 786 }
 787
 788 /*
 789  * Send a message to a message queue
 790  *      mqd     message queue descriptor
 791  *      mesg    message. ust be vaddr within a GSEG
 792  *      bytes   message size (<= 2 CL)
 793  */
 794 int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg,
 795                                 unsigned int bytes)
 796 {
 797         struct message_header *mhdr;
 798         void *cb;
 799         void *dsr;
 800         int istatus, clines, ret;
 801
 802         STAT(mesq_send);
 803         BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES);
 804
 805         clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES);
 806         if (gru_get_cpu_resources(bytes, &cb, &dsr))
 807                 return MQE_BUG_NO_RESOURCES;
 808         memcpy(dsr, mesg, bytes);
 809         mhdr = dsr;
 810         mhdr->present = MQS_FULL;
 811         mhdr->lines = clines;
 812         if (clines == 2) {
 813                 mhdr->present2 = get_present2(mhdr);
 814                 restore_present2(mhdr, MQS_FULL);
 815         }
 816
 817         do {
 818                 ret = MQE_OK;
 819                 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA);
 820                 istatus = gru_wait(cb);
 821                 if (istatus != CBS_IDLE)
 822                         ret = send_message_failure(cb, mqd, dsr, clines);
 823         } while (ret == MQIE_AGAIN);
 824         gru_free_cpu_resources(cb, dsr);
 825
 826         if (ret)
 827                 STAT(mesq_send_failed);
 828         return ret;
 829 }
 830 EXPORT_SYMBOL_GPL(gru_send_message_gpa);
 831
 832 /*
 833  * Advance the receive pointer for the queue to the next message.
 834  */
 835 void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg)
 836 {
 837         struct message_queue *mq = mqd->mq;
 838         struct message_header *mhdr = mq->next;
 839         void *next, *pnext;
 840         int half = -1;
 841         int lines = mhdr->lines;
 842
 843         if (lines == 2)
 844                 restore_present2(mhdr, MQS_EMPTY);
 845         mhdr->present = MQS_EMPTY;
 846
 847         pnext = mq->next;
 848         next = pnext + GRU_CACHE_LINE_BYTES * lines;
 849         if (next == mq->limit) {
 850                 next = mq->start;
 851                 half = 1;
 852         } else if (pnext < mq->start2 && next >= mq->start2) {
 853                 half = 0;
 854         }
 855
 856         if (half >= 0)
 857                 mq->hstatus[half] = 1;
 858         mq->next = next;
 859 }
 860 EXPORT_SYMBOL_GPL(gru_free_message);
 861
 862 /*
 863  * Get next message from message queue. Return NULL if no message
 864  * present. User must call next_message() to move to next message.
 865  *      rmq     message queue
 866  */
 867 void *gru_get_next_message(struct gru_message_queue_desc *mqd)
 868 {
 869         struct message_queue *mq = mqd->mq;
 870         struct message_header *mhdr = mq->next;
 871         int present = mhdr->present;
 872
 873         /* skip NOOP messages */
 874         while (present == MQS_NOOP) {
 875                 gru_free_message(mqd, mhdr);
 876                 mhdr = mq->next;
 877                 present = mhdr->present;
 878         }
 879
 880         /* Wait for both halves of 2 line messages */
 881         if (present == MQS_FULL && mhdr->lines == 2 &&
 882                                 get_present2(mhdr) == MQS_EMPTY)
 883                 present = MQS_EMPTY;
 884
 885         if (!present) {
 886                 STAT(mesq_receive_none);
 887                 return NULL;
 888         }
 889
 890         if (mhdr->lines == 2)
 891                 restore_present2(mhdr, mhdr->present2);
 892
 893         STAT(mesq_receive);
 894         return mhdr;
 895 }
 896 EXPORT_SYMBOL_GPL(gru_get_next_message);
 897
 898 /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
 899
 900 /*
 901  * Load a DW from a global GPA. The GPA can be a memory or MMR address.
 902  */
 903 int gru_read_gpa(unsigned long *value, unsigned long gpa)
 904 {
 905         void *cb;
 906         void *dsr;
 907         int ret, iaa;
 908
 909         STAT(read_gpa);
 910         if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
 911                 return MQE_BUG_NO_RESOURCES;
 912         iaa = gpa >> 62;
 913         gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA);
 914         ret = gru_wait(cb);
 915         if (ret == CBS_IDLE)
 916                 *value = *(unsigned long *)dsr;
 917         gru_free_cpu_resources(cb, dsr);
 918         return ret;
 919 }
 920 EXPORT_SYMBOL_GPL(gru_read_gpa);
 921
 922
 923 /*
 924  * Copy a block of data using the GRU resources
 925  */
 926 int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
 927                                 unsigned int bytes)
 928 {
 929         void *cb;
 930         void *dsr;
 931         int ret;
 932
 933         STAT(copy_gpa);
 934         if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
 935                 return MQE_BUG_NO_RESOURCES;
 936         gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),
 937                   XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA);
 938         ret = gru_wait(cb);
 939         gru_free_cpu_resources(cb, dsr);
 940         return ret;
 941 }
 942 EXPORT_SYMBOL_GPL(gru_copy_gpa);
 943
 944 /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
 945 /*      Temp - will delete after we gain confidence in the GRU          */
 946
 947 static int quicktest0(unsigned long arg)
 948 {
 949         unsigned long word0;
 950         unsigned long word1;
 951         void *cb;
 952         void *dsr;
 953         unsigned long *p;
 954         int ret = -EIO;
 955
 956         if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr))
 957                 return MQE_BUG_NO_RESOURCES;
 958         p = dsr;
 959         word0 = MAGIC;
 960         word1 = 0;
 961
 962         gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
 963         if (gru_wait(cb) != CBS_IDLE) {
 964                 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id());
 965                 goto done;
 966         }
 967
 968         if (*p != MAGIC) {
 969                 printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p);
 970                 goto done;
 971         }
 972         gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
 973         if (gru_wait(cb) != CBS_IDLE) {
 974                 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id());
 975                 goto done;
 976         }
 977
 978         if (word0 != word1 || word1 != MAGIC) {
 979                 printk(KERN_DEBUG
 980                        "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n",
 981                      smp_processor_id(), word1, MAGIC);
 982                 goto done;
 983         }
 984         ret = 0;
 985
 986 done:
 987         gru_free_cpu_resources(cb, dsr);
 988         return ret;
 989 }
 990
 991 #define ALIGNUP(p, q)   ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))
 992
 993 static int quicktest1(unsigned long arg)
 994 {
 995         struct gru_message_queue_desc mqd;
 996         void *p, *mq;
 997         int i, ret = -EIO;
 998         char mes[GRU_CACHE_LINE_BYTES], *m;
 999
1000         /* Need  1K cacheline aligned that does not cross page boundary */
1001         p = kmalloc(4096, 0);
1002         if (p == NULL)
1003                 return -ENOMEM;
1004         mq = ALIGNUP(p, 1024);
1005         memset(mes, 0xee, sizeof(mes));
1006
1007         gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
1008         for (i = 0; i < 6; i++) {
1009                 mes[8] = i;
1010                 do {
1011                         ret = gru_send_message_gpa(&mqd, mes, sizeof(mes));
1012                 } while (ret == MQE_CONGESTION);
1013                 if (ret)
1014                         break;
1015         }
1016         if (ret != MQE_QUEUE_FULL || i != 4) {
1017                 printk(KERN_DEBUG "GRU:%d quicktest1: unexpected status %d, i %d\n",
1018                        smp_processor_id(), ret, i);
1019                 goto done;
1020         }
1021
1022         for (i = 0; i < 6; i++) {
1023                 m = gru_get_next_message(&mqd);
1024                 if (!m || m[8] != i)
1025                         break;
1026                 gru_free_message(&mqd, m);
1027         }
1028         if (i != 4) {
1029                 printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n",
1030                         smp_processor_id(), i, m, m ? m[8] : -1);
1031                 goto done;
1032         }
1033         ret = 0;
1034
1035 done:
1036         kfree(p);
1037         return ret;
1038 }
1039
1040 static int quicktest2(unsigned long arg)
1041 {
1042         static DECLARE_COMPLETION(cmp);
1043         unsigned long han;
1044         int blade_id = 0;
1045         int numcb = 4;
1046         int ret = 0;
1047         unsigned long *buf;
1048         void *cb0, *cb;
1049         struct gru_control_block_status *gen;
1050         int i, k, istatus, bytes;
1051
1052         bytes = numcb * 4 * 8;
1053         buf = kmalloc(bytes, GFP_KERNEL);
1054         if (!buf)
1055                 return -ENOMEM;
1056
1057         ret = -EBUSY;
1058         han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp);
1059         if (!han)
1060                 goto done;
1061
1062         gru_lock_async_resource(han, &cb0, NULL);
1063         memset(buf, 0xee, bytes);
1064         for (i = 0; i < numcb; i++)
1065                 gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0,
1066                                 XTYPE_DW, 4, 1, IMA_INTERRUPT);
1067
1068         ret = 0;
1069         k = numcb;
1070         do {
1071                 gru_wait_async_cbr(han);
1072                 for (i = 0; i < numcb; i++) {
1073                         cb = cb0 + i * GRU_HANDLE_STRIDE;
1074                         istatus = gru_check_status(cb);
1075                         if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS)
1076                                 break;
1077                 }
1078                 if (i == numcb)
1079                         continue;
1080                 if (istatus != CBS_IDLE) {
1081                         printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i);
1082                         ret = -EFAULT;
1083                 } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] ||
1084                                 buf[4 * i + 3]) {
1085                         printk(KERN_DEBUG "GRU:%d quicktest2:cb %d,  buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n",
1086                                smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]);
1087                         ret = -EIO;
1088                 }
1089                 k--;
1090                 gen = cb;
1091                 gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */
1092         } while (k);
1093         BUG_ON(cmp.done);
1094
1095         gru_unlock_async_resource(han);
1096         gru_release_async_resources(han);
1097 done:
1098         kfree(buf);
1099         return ret;
1100 }
1101
1102 #define BUFSIZE 200
1103 static int quicktest3(unsigned long arg)
1104 {
1105         char buf1[BUFSIZE], buf2[BUFSIZE];
1106         int ret = 0;
1107
1108         memset(buf2, 0, sizeof(buf2));
1109         memset(buf1, get_cycles() & 255, sizeof(buf1));
1110         gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE);
1111         if (memcmp(buf1, buf2, BUFSIZE)) {
1112                 printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id());
1113                 ret = -EIO;
1114         }
1115         return ret;
1116 }
1117
1118 /*
1119  * Debugging only. User hook for various kernel tests
1120  * of driver & gru.
1121  */
1122 int gru_ktest(unsigned long arg)
1123 {
1124         int ret = -EINVAL;
1125
1126         switch (arg & 0xff) {
1127         case 0:
1128                 ret = quicktest0(arg);
1129                 break;
1130         case 1:
1131                 ret = quicktest1(arg);
1132                 break;
1133         case 2:
1134                 ret = quicktest2(arg);
1135                 break;
1136         case 3:
1137                 ret = quicktest3(arg);
1138                 break;
1139         case 99:
1140                 ret = gru_free_kernel_contexts();
1141                 break;
1142         }
1143         return ret;
1144
1145 }
1146
1147 int gru_kservices_init(void)
1148 {
1149         return 0;
1150 }
1151
1152 void gru_kservices_exit(void)
1153 {
1154         if (gru_free_kernel_contexts())
1155                 BUG();
1156 }
1157