drivers/misc/habanalabs/device.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * Copyright 2016-2019 HabanaLabs, Ltd.
   5  * All Rights Reserved.
   6  */
   7
   8 #define pr_fmt(fmt)                     "habanalabs: " fmt
   9
  10 #include "habanalabs.h"
  11
  12 #include <linux/pci.h>
  13 #include <linux/sched/signal.h>
  14 #include <linux/hwmon.h>
  15 #include <uapi/misc/habanalabs.h>
  16
  17 #define HL_PLDM_PENDING_RESET_PER_SEC   (HL_PENDING_RESET_PER_SEC * 10)
  18
  19 bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
  20 {
  21         if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
  22                 return true;
  23         else
  24                 return false;
  25 }
  26
  27 enum hl_device_status hl_device_status(struct hl_device *hdev)
  28 {
  29         enum hl_device_status status;
  30
  31         if (hdev->disabled)
  32                 status = HL_DEVICE_STATUS_MALFUNCTION;
  33         else if (atomic_read(&hdev->in_reset))
  34                 status = HL_DEVICE_STATUS_IN_RESET;
  35         else
  36                 status = HL_DEVICE_STATUS_OPERATIONAL;
  37
  38         return status;
  39 };
  40
  41 static void hpriv_release(struct kref *ref)
  42 {
  43         struct hl_fpriv *hpriv;
  44         struct hl_device *hdev;
  45
  46         hpriv = container_of(ref, struct hl_fpriv, refcount);
  47
  48         hdev = hpriv->hdev;
  49
  50         put_pid(hpriv->taskpid);
  51
  52         hl_debugfs_remove_file(hpriv);
  53
  54         mutex_destroy(&hpriv->restore_phase_mutex);
  55
  56         kfree(hpriv);
  57
  58         /* Now the FD is really closed */
  59         atomic_dec(&hdev->fd_open_cnt);
  60
  61         /* This allows a new user context to open the device */
  62         hdev->user_ctx = NULL;
  63 }
  64
  65 void hl_hpriv_get(struct hl_fpriv *hpriv)
  66 {
  67         kref_get(&hpriv->refcount);
  68 }
  69
  70 void hl_hpriv_put(struct hl_fpriv *hpriv)
  71 {
  72         kref_put(&hpriv->refcount, hpriv_release);
  73 }
  74
  75 /*
  76  * hl_device_release - release function for habanalabs device
  77  *
  78  * @inode: pointer to inode structure
  79  * @filp: pointer to file structure
  80  *
  81  * Called when process closes an habanalabs device
  82  */
  83 static int hl_device_release(struct inode *inode, struct file *filp)
  84 {
  85         struct hl_fpriv *hpriv = filp->private_data;
  86
  87         hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
  88         hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
  89
  90         filp->private_data = NULL;
  91
  92         hl_hpriv_put(hpriv);
  93
  94         return 0;
  95 }
  96
  97 /*
  98  * hl_mmap - mmap function for habanalabs device
  99  *
 100  * @*filp: pointer to file structure
 101  * @*vma: pointer to vm_area_struct of the process
 102  *
 103  * Called when process does an mmap on habanalabs device. Call the device's mmap
 104  * function at the end of the common code.
 105  */
 106 static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 107 {
 108         struct hl_fpriv *hpriv = filp->private_data;
 109
 110         if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) {
 111                 vma->vm_pgoff ^= HL_MMAP_CB_MASK;
 112                 return hl_cb_mmap(hpriv, vma);
 113         }
 114
 115         return -EINVAL;
 116 }
 117
 118 static const struct file_operations hl_ops = {
 119         .owner = THIS_MODULE,
 120         .open = hl_device_open,
 121         .release = hl_device_release,
 122         .mmap = hl_mmap,
 123         .unlocked_ioctl = hl_ioctl,
 124         .compat_ioctl = hl_ioctl
 125 };
 126
 127 /*
 128  * device_setup_cdev - setup cdev and device for habanalabs device
 129  *
 130  * @hdev: pointer to habanalabs device structure
 131  * @hclass: pointer to the class object of the device
 132  * @minor: minor number of the specific device
 133  * @fpos : file operations to install for this device
 134  *
 135  * Create a cdev and a Linux device for habanalabs's device. Need to be
 136  * called at the end of the habanalabs device initialization process,
 137  * because this function exposes the device to the user
 138  */
 139 static int device_setup_cdev(struct hl_device *hdev, struct class *hclass,
 140                                 int minor, const struct file_operations *fops)
 141 {
 142         int err, devno = MKDEV(hdev->major, minor);
 143         struct cdev *hdev_cdev = &hdev->cdev;
 144         char *name;
 145
 146         name = kasprintf(GFP_KERNEL, "hl%d", hdev->id);
 147         if (!name)
 148                 return -ENOMEM;
 149
 150         cdev_init(hdev_cdev, fops);
 151         hdev_cdev->owner = THIS_MODULE;
 152         err = cdev_add(hdev_cdev, devno, 1);
 153         if (err) {
 154                 pr_err("Failed to add char device %s\n", name);
 155                 goto err_cdev_add;
 156         }
 157
 158         hdev->dev = device_create(hclass, NULL, devno, NULL, "%s", name);
 159         if (IS_ERR(hdev->dev)) {
 160                 pr_err("Failed to create device %s\n", name);
 161                 err = PTR_ERR(hdev->dev);
 162                 goto err_device_create;
 163         }
 164
 165         dev_set_drvdata(hdev->dev, hdev);
 166
 167         kfree(name);
 168
 169         return 0;
 170
 171 err_device_create:
 172         cdev_del(hdev_cdev);
 173 err_cdev_add:
 174         kfree(name);
 175         return err;
 176 }
 177
 178 /*
 179  * device_early_init - do some early initialization for the habanalabs device
 180  *
 181  * @hdev: pointer to habanalabs device structure
 182  *
 183  * Install the relevant function pointers and call the early_init function,
 184  * if such a function exists
 185  */
 186 static int device_early_init(struct hl_device *hdev)
 187 {
 188         int rc;
 189
 190         switch (hdev->asic_type) {
 191         case ASIC_GOYA:
 192                 goya_set_asic_funcs(hdev);
 193                 strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
 194                 break;
 195         default:
 196                 dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
 197                         hdev->asic_type);
 198                 return -EINVAL;
 199         }
 200
 201         rc = hdev->asic_funcs->early_init(hdev);
 202         if (rc)
 203                 return rc;
 204
 205         rc = hl_asid_init(hdev);
 206         if (rc)
 207                 goto early_fini;
 208
 209         hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0);
 210         if (hdev->cq_wq == NULL) {
 211                 dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
 212                 rc = -ENOMEM;
 213                 goto asid_fini;
 214         }
 215
 216         hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
 217         if (hdev->eq_wq == NULL) {
 218                 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
 219                 rc = -ENOMEM;
 220                 goto free_cq_wq;
 221         }
 222
 223         hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
 224                                         GFP_KERNEL);
 225         if (!hdev->hl_chip_info) {
 226                 rc = -ENOMEM;
 227                 goto free_eq_wq;
 228         }
 229
 230         hl_cb_mgr_init(&hdev->kernel_cb_mgr);
 231
 232         mutex_init(&hdev->fd_open_cnt_lock);
 233         mutex_init(&hdev->send_cpu_message_lock);
 234         mutex_init(&hdev->mmu_cache_lock);
 235         INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
 236         spin_lock_init(&hdev->hw_queues_mirror_lock);
 237         atomic_set(&hdev->in_reset, 0);
 238         atomic_set(&hdev->fd_open_cnt, 0);
 239         atomic_set(&hdev->cs_active_cnt, 0);
 240
 241         return 0;
 242
 243 free_eq_wq:
 244         destroy_workqueue(hdev->eq_wq);
 245 free_cq_wq:
 246         destroy_workqueue(hdev->cq_wq);
 247 asid_fini:
 248         hl_asid_fini(hdev);
 249 early_fini:
 250         if (hdev->asic_funcs->early_fini)
 251                 hdev->asic_funcs->early_fini(hdev);
 252
 253         return rc;
 254 }
 255
 256 /*
 257  * device_early_fini - finalize all that was done in device_early_init
 258  *
 259  * @hdev: pointer to habanalabs device structure
 260  *
 261  */
 262 static void device_early_fini(struct hl_device *hdev)
 263 {
 264         mutex_destroy(&hdev->mmu_cache_lock);
 265         mutex_destroy(&hdev->send_cpu_message_lock);
 266
 267         hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 268
 269         kfree(hdev->hl_chip_info);
 270
 271         destroy_workqueue(hdev->eq_wq);
 272         destroy_workqueue(hdev->cq_wq);
 273
 274         hl_asid_fini(hdev);
 275
 276         if (hdev->asic_funcs->early_fini)
 277                 hdev->asic_funcs->early_fini(hdev);
 278
 279         mutex_destroy(&hdev->fd_open_cnt_lock);
 280 }
 281
 282 static void set_freq_to_low_job(struct work_struct *work)
 283 {
 284         struct hl_device *hdev = container_of(work, struct hl_device,
 285                                                 work_freq.work);
 286
 287         if (atomic_read(&hdev->fd_open_cnt) == 0)
 288                 hl_device_set_frequency(hdev, PLL_LOW);
 289
 290         schedule_delayed_work(&hdev->work_freq,
 291                         usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
 292 }
 293
 294 static void hl_device_heartbeat(struct work_struct *work)
 295 {
 296         struct hl_device *hdev = container_of(work, struct hl_device,
 297                                                 work_heartbeat.work);
 298
 299         if (hl_device_disabled_or_in_reset(hdev))
 300                 goto reschedule;
 301
 302         if (!hdev->asic_funcs->send_heartbeat(hdev))
 303                 goto reschedule;
 304
 305         dev_err(hdev->dev, "Device heartbeat failed!\n");
 306         hl_device_reset(hdev, true, false);
 307
 308         return;
 309
 310 reschedule:
 311         schedule_delayed_work(&hdev->work_heartbeat,
 312                         usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 313 }
 314
 315 /*
 316  * device_late_init - do late stuff initialization for the habanalabs device
 317  *
 318  * @hdev: pointer to habanalabs device structure
 319  *
 320  * Do stuff that either needs the device H/W queues to be active or needs
 321  * to happen after all the rest of the initialization is finished
 322  */
 323 static int device_late_init(struct hl_device *hdev)
 324 {
 325         int rc;
 326
 327         INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
 328         hdev->high_pll = hdev->asic_prop.high_pll;
 329
 330         /* force setting to low frequency */
 331         atomic_set(&hdev->curr_pll_profile, PLL_LOW);
 332
 333         if (hdev->pm_mng_profile == PM_AUTO)
 334                 hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
 335         else
 336                 hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
 337
 338         if (hdev->asic_funcs->late_init) {
 339                 rc = hdev->asic_funcs->late_init(hdev);
 340                 if (rc) {
 341                         dev_err(hdev->dev,
 342                                 "failed late initialization for the H/W\n");
 343                         return rc;
 344                 }
 345         }
 346
 347         schedule_delayed_work(&hdev->work_freq,
 348                         usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
 349
 350         if (hdev->heartbeat) {
 351                 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
 352                 schedule_delayed_work(&hdev->work_heartbeat,
 353                                 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 354         }
 355
 356         hdev->late_init_done = true;
 357
 358         return 0;
 359 }
 360
 361 /*
 362  * device_late_fini - finalize all that was done in device_late_init
 363  *
 364  * @hdev: pointer to habanalabs device structure
 365  *
 366  */
 367 static void device_late_fini(struct hl_device *hdev)
 368 {
 369         if (!hdev->late_init_done)
 370                 return;
 371
 372         cancel_delayed_work_sync(&hdev->work_freq);
 373         if (hdev->heartbeat)
 374                 cancel_delayed_work_sync(&hdev->work_heartbeat);
 375
 376         if (hdev->asic_funcs->late_fini)
 377                 hdev->asic_funcs->late_fini(hdev);
 378
 379         hdev->late_init_done = false;
 380 }
 381
 382 /*
 383  * hl_device_set_frequency - set the frequency of the device
 384  *
 385  * @hdev: pointer to habanalabs device structure
 386  * @freq: the new frequency value
 387  *
 388  * Change the frequency if needed.
 389  * We allose to set PLL to low only if there is no user process
 390  * Returns 0 if no change was done, otherwise returns 1;
 391  */
 392 int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
 393 {
 394         enum hl_pll_frequency old_freq =
 395                         (freq == PLL_HIGH) ? PLL_LOW : PLL_HIGH;
 396         int ret;
 397
 398         if (hdev->pm_mng_profile == PM_MANUAL)
 399                 return 0;
 400
 401         ret = atomic_cmpxchg(&hdev->curr_pll_profile, old_freq, freq);
 402         if (ret == freq)
 403                 return 0;
 404
 405         /*
 406          * in case we want to lower frequency, check if device is not
 407          * opened. We must have a check here to workaround race condition with
 408          * hl_device_open
 409          */
 410         if ((freq == PLL_LOW) && (atomic_read(&hdev->fd_open_cnt) > 0)) {
 411                 atomic_set(&hdev->curr_pll_profile, PLL_HIGH);
 412                 return 0;
 413         }
 414
 415         dev_dbg(hdev->dev, "Changing device frequency to %s\n",
 416                 freq == PLL_HIGH ? "high" : "low");
 417
 418         hdev->asic_funcs->set_pll_profile(hdev, freq);
 419
 420         return 1;
 421 }
 422
 423 /*
 424  * hl_device_suspend - initiate device suspend
 425  *
 426  * @hdev: pointer to habanalabs device structure
 427  *
 428  * Puts the hw in the suspend state (all asics).
 429  * Returns 0 for success or an error on failure.
 430  * Called at driver suspend.
 431  */
 432 int hl_device_suspend(struct hl_device *hdev)
 433 {
 434         int rc;
 435
 436         pci_save_state(hdev->pdev);
 437
 438         /* Block future CS/VM/JOB completion operations */
 439         rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 440         if (rc) {
 441                 dev_err(hdev->dev, "Can't suspend while in reset\n");
 442                 return -EIO;
 443         }
 444
 445         /* This blocks all other stuff that is not blocked by in_reset */
 446         hdev->disabled = true;
 447
 448         /*
 449          * Flush anyone that is inside the critical section of enqueue
 450          * jobs to the H/W
 451          */
 452         hdev->asic_funcs->hw_queues_lock(hdev);
 453         hdev->asic_funcs->hw_queues_unlock(hdev);
 454
 455         /* Flush processes that are sending message to CPU */
 456         mutex_lock(&hdev->send_cpu_message_lock);
 457         mutex_unlock(&hdev->send_cpu_message_lock);
 458
 459         rc = hdev->asic_funcs->suspend(hdev);
 460         if (rc)
 461                 dev_err(hdev->dev,
 462                         "Failed to disable PCI access of device CPU\n");
 463
 464         /* Shut down the device */
 465         pci_disable_device(hdev->pdev);
 466         pci_set_power_state(hdev->pdev, PCI_D3hot);
 467
 468         return 0;
 469 }
 470
 471 /*
 472  * hl_device_resume - initiate device resume
 473  *
 474  * @hdev: pointer to habanalabs device structure
 475  *
 476  * Bring the hw back to operating state (all asics).
 477  * Returns 0 for success or an error on failure.
 478  * Called at driver resume.
 479  */
 480 int hl_device_resume(struct hl_device *hdev)
 481 {
 482         int rc;
 483
 484         pci_set_power_state(hdev->pdev, PCI_D0);
 485         pci_restore_state(hdev->pdev);
 486         rc = pci_enable_device_mem(hdev->pdev);
 487         if (rc) {
 488                 dev_err(hdev->dev,
 489                         "Failed to enable PCI device in resume\n");
 490                 return rc;
 491         }
 492
 493         pci_set_master(hdev->pdev);
 494
 495         rc = hdev->asic_funcs->resume(hdev);
 496         if (rc) {
 497                 dev_err(hdev->dev, "Failed to resume device after suspend\n");
 498                 goto disable_device;
 499         }
 500
 501
 502         hdev->disabled = false;
 503         atomic_set(&hdev->in_reset, 0);
 504
 505         rc = hl_device_reset(hdev, true, false);
 506         if (rc) {
 507                 dev_err(hdev->dev, "Failed to reset device during resume\n");
 508                 goto disable_device;
 509         }
 510
 511         return 0;
 512
 513 disable_device:
 514         pci_clear_master(hdev->pdev);
 515         pci_disable_device(hdev->pdev);
 516
 517         return rc;
 518 }
 519
 520 static void device_kill_open_processes(struct hl_device *hdev)
 521 {
 522         u16 pending_total, pending_cnt;
 523         struct task_struct *task = NULL;
 524
 525         if (hdev->pldm)
 526                 pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
 527         else
 528                 pending_total = HL_PENDING_RESET_PER_SEC;
 529
 530         pending_cnt = pending_total;
 531
 532         /* Flush all processes that are inside hl_open */
 533         mutex_lock(&hdev->fd_open_cnt_lock);
 534
 535         while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) {
 536
 537                 pending_cnt--;
 538
 539                 dev_info(hdev->dev,
 540                         "Can't HARD reset, waiting for user to close FD\n");
 541                 ssleep(1);
 542         }
 543
 544         if (atomic_read(&hdev->fd_open_cnt)) {
 545                 task = get_pid_task(hdev->user_ctx->hpriv->taskpid,
 546                                         PIDTYPE_PID);
 547                 if (task) {
 548                         dev_info(hdev->dev, "Killing user processes\n");
 549                         send_sig(SIGKILL, task, 1);
 550                         msleep(100);
 551
 552                         put_task_struct(task);
 553                 }
 554         }
 555
 556         /* We killed the open users, but because the driver cleans up after the
 557          * user contexts are closed (e.g. mmu mappings), we need to wait again
 558          * to make sure the cleaning phase is finished before continuing with
 559          * the reset
 560          */
 561
 562         pending_cnt = pending_total;
 563
 564         while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) {
 565
 566                 pending_cnt--;
 567
 568                 ssleep(1);
 569         }
 570
 571         if (atomic_read(&hdev->fd_open_cnt))
 572                 dev_crit(hdev->dev,
 573                         "Going to hard reset with open user contexts\n");
 574
 575         mutex_unlock(&hdev->fd_open_cnt_lock);
 576
 577 }
 578
 579 static void device_hard_reset_pending(struct work_struct *work)
 580 {
 581         struct hl_device_reset_work *device_reset_work =
 582                 container_of(work, struct hl_device_reset_work, reset_work);
 583         struct hl_device *hdev = device_reset_work->hdev;
 584
 585         device_kill_open_processes(hdev);
 586
 587         hl_device_reset(hdev, true, true);
 588
 589         kfree(device_reset_work);
 590 }
 591
 592 /*
 593  * hl_device_reset - reset the device
 594  *
 595  * @hdev: pointer to habanalabs device structure
 596  * @hard_reset: should we do hard reset to all engines or just reset the
 597  *              compute/dma engines
 598  *
 599  * Block future CS and wait for pending CS to be enqueued
 600  * Call ASIC H/W fini
 601  * Flush all completions
 602  * Re-initialize all internal data structures
 603  * Call ASIC H/W init, late_init
 604  * Test queues
 605  * Enable device
 606  *
 607  * Returns 0 for success or an error on failure.
 608  */
 609 int hl_device_reset(struct hl_device *hdev, bool hard_reset,
 610                         bool from_hard_reset_thread)
 611 {
 612         int i, rc;
 613
 614         if (!hdev->init_done) {
 615                 dev_err(hdev->dev,
 616                         "Can't reset before initialization is done\n");
 617                 return 0;
 618         }
 619
 620         /*
 621          * Prevent concurrency in this function - only one reset should be
 622          * done at any given time. Only need to perform this if we didn't
 623          * get from the dedicated hard reset thread
 624          */
 625         if (!from_hard_reset_thread) {
 626                 /* Block future CS/VM/JOB completion operations */
 627                 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 628                 if (rc)
 629                         return 0;
 630
 631                 /* This also blocks future CS/VM/JOB completion operations */
 632                 hdev->disabled = true;
 633
 634                 /*
 635                  * Flush anyone that is inside the critical section of enqueue
 636                  * jobs to the H/W
 637                  */
 638                 hdev->asic_funcs->hw_queues_lock(hdev);
 639                 hdev->asic_funcs->hw_queues_unlock(hdev);
 640
 641                 dev_err(hdev->dev, "Going to RESET device!\n");
 642         }
 643
 644 again:
 645         if ((hard_reset) && (!from_hard_reset_thread)) {
 646                 struct hl_device_reset_work *device_reset_work;
 647
 648                 hdev->hard_reset_pending = true;
 649
 650                 if (!hdev->pdev) {
 651                         dev_err(hdev->dev,
 652                                 "Reset action is NOT supported in simulator\n");
 653                         rc = -EINVAL;
 654                         goto out_err;
 655                 }
 656
 657                 device_reset_work = kzalloc(sizeof(*device_reset_work),
 658                                                 GFP_ATOMIC);
 659                 if (!device_reset_work) {
 660                         rc = -ENOMEM;
 661                         goto out_err;
 662                 }
 663
 664                 /*
 665                  * Because the reset function can't run from interrupt or
 666                  * from heartbeat work, we need to call the reset function
 667                  * from a dedicated work
 668                  */
 669                 INIT_WORK(&device_reset_work->reset_work,
 670                                 device_hard_reset_pending);
 671                 device_reset_work->hdev = hdev;
 672                 schedule_work(&device_reset_work->reset_work);
 673
 674                 return 0;
 675         }
 676
 677         if (hard_reset) {
 678                 device_late_fini(hdev);
 679
 680                 /*
 681                  * Now that the heartbeat thread is closed, flush processes
 682                  * which are sending messages to CPU
 683                  */
 684                 mutex_lock(&hdev->send_cpu_message_lock);
 685                 mutex_unlock(&hdev->send_cpu_message_lock);
 686         }
 687
 688         /*
 689          * Halt the engines and disable interrupts so we won't get any more
 690          * completions from H/W and we won't have any accesses from the
 691          * H/W to the host machine
 692          */
 693         hdev->asic_funcs->halt_engines(hdev, hard_reset);
 694
 695         /* Go over all the queues, release all CS and their jobs */
 696         hl_cs_rollback_all(hdev);
 697
 698         /* Release kernel context */
 699         if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1))
 700                 hdev->kernel_ctx = NULL;
 701
 702         /* Reset the H/W. It will be in idle state after this returns */
 703         hdev->asic_funcs->hw_fini(hdev, hard_reset);
 704
 705         if (hard_reset) {
 706                 hl_vm_fini(hdev);
 707                 hl_eq_reset(hdev, &hdev->event_queue);
 708         }
 709
 710         /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
 711         hl_hw_queue_reset(hdev, hard_reset);
 712         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 713                 hl_cq_reset(hdev, &hdev->completion_queue[i]);
 714
 715         /* Make sure the context switch phase will run again */
 716         if (hdev->user_ctx) {
 717                 atomic_set(&hdev->user_ctx->thread_ctx_switch_token, 1);
 718                 hdev->user_ctx->thread_ctx_switch_wait_token = 0;
 719         }
 720
 721         /* Finished tear-down, starting to re-initialize */
 722
 723         if (hard_reset) {
 724                 hdev->device_cpu_disabled = false;
 725                 hdev->hard_reset_pending = false;
 726
 727                 if (hdev->kernel_ctx) {
 728                         dev_crit(hdev->dev,
 729                                 "kernel ctx was alive during hard reset, something is terribly wrong\n");
 730                         rc = -EBUSY;
 731                         goto out_err;
 732                 }
 733
 734                 /* Allocate the kernel context */
 735                 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
 736                                                 GFP_KERNEL);
 737                 if (!hdev->kernel_ctx) {
 738                         rc = -ENOMEM;
 739                         goto out_err;
 740                 }
 741
 742                 hdev->user_ctx = NULL;
 743
 744                 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
 745                 if (rc) {
 746                         dev_err(hdev->dev,
 747                                 "failed to init kernel ctx in hard reset\n");
 748                         kfree(hdev->kernel_ctx);
 749                         hdev->kernel_ctx = NULL;
 750                         goto out_err;
 751                 }
 752         }
 753
 754         rc = hdev->asic_funcs->hw_init(hdev);
 755         if (rc) {
 756                 dev_err(hdev->dev,
 757                         "failed to initialize the H/W after reset\n");
 758                 goto out_err;
 759         }
 760
 761         hdev->disabled = false;
 762
 763         /* Check that the communication with the device is working */
 764         rc = hdev->asic_funcs->test_queues(hdev);
 765         if (rc) {
 766                 dev_err(hdev->dev,
 767                         "Failed to detect if device is alive after reset\n");
 768                 goto out_err;
 769         }
 770
 771         if (hard_reset) {
 772                 rc = device_late_init(hdev);
 773                 if (rc) {
 774                         dev_err(hdev->dev,
 775                                 "Failed late init after hard reset\n");
 776                         goto out_err;
 777                 }
 778
 779                 rc = hl_vm_init(hdev);
 780                 if (rc) {
 781                         dev_err(hdev->dev,
 782                                 "Failed to init memory module after hard reset\n");
 783                         goto out_err;
 784                 }
 785
 786                 hl_set_max_power(hdev, hdev->max_power);
 787         } else {
 788                 rc = hdev->asic_funcs->soft_reset_late_init(hdev);
 789                 if (rc) {
 790                         dev_err(hdev->dev,
 791                                 "Failed late init after soft reset\n");
 792                         goto out_err;
 793                 }
 794         }
 795
 796         atomic_set(&hdev->in_reset, 0);
 797
 798         if (hard_reset)
 799                 hdev->hard_reset_cnt++;
 800         else
 801                 hdev->soft_reset_cnt++;
 802
 803         return 0;
 804
 805 out_err:
 806         hdev->disabled = true;
 807
 808         if (hard_reset) {
 809                 dev_err(hdev->dev,
 810                         "Failed to reset! Device is NOT usable\n");
 811                 hdev->hard_reset_cnt++;
 812         } else {
 813                 dev_err(hdev->dev,
 814                         "Failed to do soft-reset, trying hard reset\n");
 815                 hdev->soft_reset_cnt++;
 816                 hard_reset = true;
 817                 goto again;
 818         }
 819
 820         atomic_set(&hdev->in_reset, 0);
 821
 822         return rc;
 823 }
 824
 825 /*
 826  * hl_device_init - main initialization function for habanalabs device
 827  *
 828  * @hdev: pointer to habanalabs device structure
 829  *
 830  * Allocate an id for the device, do early initialization and then call the
 831  * ASIC specific initialization functions. Finally, create the cdev and the
 832  * Linux device to expose it to the user
 833  */
 834 int hl_device_init(struct hl_device *hdev, struct class *hclass)
 835 {
 836         int i, rc, cq_ready_cnt;
 837
 838         /* Create device */
 839         rc = device_setup_cdev(hdev, hclass, hdev->id, &hl_ops);
 840
 841         if (rc)
 842                 goto out_disabled;
 843
 844         /* Initialize ASIC function pointers and perform early init */
 845         rc = device_early_init(hdev);
 846         if (rc)
 847                 goto release_device;
 848
 849         /*
 850          * Start calling ASIC initialization. First S/W then H/W and finally
 851          * late init
 852          */
 853         rc = hdev->asic_funcs->sw_init(hdev);
 854         if (rc)
 855                 goto early_fini;
 856
 857         /*
 858          * Initialize the H/W queues. Must be done before hw_init, because
 859          * there the addresses of the kernel queue are being written to the
 860          * registers of the device
 861          */
 862         rc = hl_hw_queues_create(hdev);
 863         if (rc) {
 864                 dev_err(hdev->dev, "failed to initialize kernel queues\n");
 865                 goto sw_fini;
 866         }
 867
 868         /*
 869          * Initialize the completion queues. Must be done before hw_init,
 870          * because there the addresses of the completion queues are being
 871          * passed as arguments to request_irq
 872          */
 873         hdev->completion_queue =
 874                         kcalloc(hdev->asic_prop.completion_queues_count,
 875                                 sizeof(*hdev->completion_queue), GFP_KERNEL);
 876
 877         if (!hdev->completion_queue) {
 878                 dev_err(hdev->dev, "failed to allocate completion queues\n");
 879                 rc = -ENOMEM;
 880                 goto hw_queues_destroy;
 881         }
 882
 883         for (i = 0, cq_ready_cnt = 0;
 884                         i < hdev->asic_prop.completion_queues_count;
 885                         i++, cq_ready_cnt++) {
 886                 rc = hl_cq_init(hdev, &hdev->completion_queue[i], i);
 887                 if (rc) {
 888                         dev_err(hdev->dev,
 889                                 "failed to initialize completion queue\n");
 890                         goto cq_fini;
 891                 }
 892         }
 893
 894         /*
 895          * Initialize the event queue. Must be done before hw_init,
 896          * because there the address of the event queue is being
 897          * passed as argument to request_irq
 898          */
 899         rc = hl_eq_init(hdev, &hdev->event_queue);
 900         if (rc) {
 901                 dev_err(hdev->dev, "failed to initialize event queue\n");
 902                 goto cq_fini;
 903         }
 904
 905         /* Allocate the kernel context */
 906         hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
 907         if (!hdev->kernel_ctx) {
 908                 rc = -ENOMEM;
 909                 goto eq_fini;
 910         }
 911
 912         hdev->user_ctx = NULL;
 913
 914         rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
 915         if (rc) {
 916                 dev_err(hdev->dev, "failed to initialize kernel context\n");
 917                 goto free_ctx;
 918         }
 919
 920         rc = hl_cb_pool_init(hdev);
 921         if (rc) {
 922                 dev_err(hdev->dev, "failed to initialize CB pool\n");
 923                 goto release_ctx;
 924         }
 925
 926         rc = hl_sysfs_init(hdev);
 927         if (rc) {
 928                 dev_err(hdev->dev, "failed to initialize sysfs\n");
 929                 goto free_cb_pool;
 930         }
 931
 932         hl_debugfs_add_device(hdev);
 933
 934         if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
 935                 dev_info(hdev->dev,
 936                         "H/W state is dirty, must reset before initializing\n");
 937                 hdev->asic_funcs->hw_fini(hdev, true);
 938         }
 939
 940         rc = hdev->asic_funcs->hw_init(hdev);
 941         if (rc) {
 942                 dev_err(hdev->dev, "failed to initialize the H/W\n");
 943                 rc = 0;
 944                 goto out_disabled;
 945         }
 946
 947         hdev->disabled = false;
 948
 949         /* Check that the communication with the device is working */
 950         rc = hdev->asic_funcs->test_queues(hdev);
 951         if (rc) {
 952                 dev_err(hdev->dev, "Failed to detect if device is alive\n");
 953                 rc = 0;
 954                 goto out_disabled;
 955         }
 956
 957         /* After test_queues, KMD can start sending messages to device CPU */
 958
 959         rc = device_late_init(hdev);
 960         if (rc) {
 961                 dev_err(hdev->dev, "Failed late initialization\n");
 962                 rc = 0;
 963                 goto out_disabled;
 964         }
 965
 966         dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
 967                 hdev->asic_name,
 968                 hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
 969
 970         rc = hl_vm_init(hdev);
 971         if (rc) {
 972                 dev_err(hdev->dev, "Failed to initialize memory module\n");
 973                 rc = 0;
 974                 goto out_disabled;
 975         }
 976
 977         /*
 978          * hl_hwmon_init must be called after device_late_init, because only
 979          * there we get the information from the device about which
 980          * hwmon-related sensors the device supports
 981          */
 982         rc = hl_hwmon_init(hdev);
 983         if (rc) {
 984                 dev_err(hdev->dev, "Failed to initialize hwmon\n");
 985                 rc = 0;
 986                 goto out_disabled;
 987         }
 988
 989         dev_notice(hdev->dev,
 990                 "Successfully added device to habanalabs driver\n");
 991
 992         hdev->init_done = true;
 993
 994         return 0;
 995
 996 free_cb_pool:
 997         hl_cb_pool_fini(hdev);
 998 release_ctx:
 999         if (hl_ctx_put(hdev->kernel_ctx) != 1)
1000                 dev_err(hdev->dev,
1001                         "kernel ctx is still alive on initialization failure\n");
1002 free_ctx:
1003         kfree(hdev->kernel_ctx);
1004 eq_fini:
1005         hl_eq_fini(hdev, &hdev->event_queue);
1006 cq_fini:
1007         for (i = 0 ; i < cq_ready_cnt ; i++)
1008                 hl_cq_fini(hdev, &hdev->completion_queue[i]);
1009         kfree(hdev->completion_queue);
1010 hw_queues_destroy:
1011         hl_hw_queues_destroy(hdev);
1012 sw_fini:
1013         hdev->asic_funcs->sw_fini(hdev);
1014 early_fini:
1015         device_early_fini(hdev);
1016 release_device:
1017         device_destroy(hclass, hdev->dev->devt);
1018         cdev_del(&hdev->cdev);
1019 out_disabled:
1020         hdev->disabled = true;
1021         if (hdev->pdev)
1022                 dev_err(&hdev->pdev->dev,
1023                         "Failed to initialize hl%d. Device is NOT usable !\n",
1024                         hdev->id);
1025         else
1026                 pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
1027                         hdev->id);
1028
1029         return rc;
1030 }
1031
1032 /*
1033  * hl_device_fini - main tear-down function for habanalabs device
1034  *
1035  * @hdev: pointer to habanalabs device structure
1036  *
1037  * Destroy the device, call ASIC fini functions and release the id
1038  */
1039 void hl_device_fini(struct hl_device *hdev)
1040 {
1041         int i, rc;
1042         ktime_t timeout;
1043
1044         dev_info(hdev->dev, "Removing device\n");
1045
1046         /*
1047          * This function is competing with the reset function, so try to
1048          * take the reset atomic and if we are already in middle of reset,
1049          * wait until reset function is finished. Reset function is designed
1050          * to always finish (could take up to a few seconds in worst case).
1051          */
1052
1053         timeout = ktime_add_us(ktime_get(),
1054                                 HL_PENDING_RESET_PER_SEC * 1000 * 1000 * 4);
1055         rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1056         while (rc) {
1057                 usleep_range(50, 200);
1058                 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1059                 if (ktime_compare(ktime_get(), timeout) > 0) {
1060                         WARN(1, "Failed to remove device because reset function did not finish\n");
1061                         return;
1062                 }
1063         }
1064
1065         /* Mark device as disabled */
1066         hdev->disabled = true;
1067
1068         /*
1069          * Flush anyone that is inside the critical section of enqueue
1070          * jobs to the H/W
1071          */
1072         hdev->asic_funcs->hw_queues_lock(hdev);
1073         hdev->asic_funcs->hw_queues_unlock(hdev);
1074
1075         hdev->hard_reset_pending = true;
1076
1077         device_kill_open_processes(hdev);
1078
1079         hl_hwmon_fini(hdev);
1080
1081         device_late_fini(hdev);
1082
1083         hl_debugfs_remove_device(hdev);
1084
1085         hl_sysfs_fini(hdev);
1086
1087         /*
1088          * Halt the engines and disable interrupts so we won't get any more
1089          * completions from H/W and we won't have any accesses from the
1090          * H/W to the host machine
1091          */
1092         hdev->asic_funcs->halt_engines(hdev, true);
1093
1094         /* Go over all the queues, release all CS and their jobs */
1095         hl_cs_rollback_all(hdev);
1096
1097         hl_cb_pool_fini(hdev);
1098
1099         /* Release kernel context */
1100         if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
1101                 dev_err(hdev->dev, "kernel ctx is still alive\n");
1102
1103         /* Reset the H/W. It will be in idle state after this returns */
1104         hdev->asic_funcs->hw_fini(hdev, true);
1105
1106         hl_vm_fini(hdev);
1107
1108         hl_eq_fini(hdev, &hdev->event_queue);
1109
1110         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1111                 hl_cq_fini(hdev, &hdev->completion_queue[i]);
1112         kfree(hdev->completion_queue);
1113
1114         hl_hw_queues_destroy(hdev);
1115
1116         /* Call ASIC S/W finalize function */
1117         hdev->asic_funcs->sw_fini(hdev);
1118
1119         device_early_fini(hdev);
1120
1121         /* Hide device from user */
1122         device_destroy(hdev->dev->class, hdev->dev->devt);
1123         cdev_del(&hdev->cdev);
1124
1125         pr_info("removed device successfully\n");
1126 }
1127
1128 /*
1129  * hl_poll_timeout_memory - Periodically poll a host memory address
1130  *                              until it is not zero or a timeout occurs
1131  * @hdev: pointer to habanalabs device structure
1132  * @addr: Address to poll
1133  * @timeout_us: timeout in us
1134  * @val: Variable to read the value into
1135  *
1136  * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
1137  * case, the last read value at @addr is stored in @val. Must not
1138  * be called from atomic context if sleep_us or timeout_us are used.
1139  *
1140  * The function sleeps for 100us with timeout value of
1141  * timeout_us
1142  */
1143 int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr,
1144                                 u32 timeout_us, u32 *val)
1145 {
1146         /*
1147          * address in this function points always to a memory location in the
1148          * host's (server's) memory. That location is updated asynchronously
1149          * either by the direct access of the device or by another core
1150          */
1151         u32 *paddr = (u32 *) (uintptr_t) addr;
1152         ktime_t timeout;
1153
1154         /* timeout should be longer when working with simulator */
1155         if (!hdev->pdev)
1156                 timeout_us *= 10;
1157
1158         timeout = ktime_add_us(ktime_get(), timeout_us);
1159
1160         might_sleep();
1161
1162         for (;;) {
1163                 /*
1164                  * Flush CPU read/write buffers to make sure we read updates
1165                  * done by other cores or by the device
1166                  */
1167                 mb();
1168                 *val = *paddr;
1169                 if (*val)
1170                         break;
1171                 if (ktime_compare(ktime_get(), timeout) > 0) {
1172                         *val = *paddr;
1173                         break;
1174                 }
1175                 usleep_range((100 >> 2) + 1, 100);
1176         }
1177
1178         return *val ? 0 : -ETIMEDOUT;
1179 }
1180
1181 /*
1182  * hl_poll_timeout_devicememory - Periodically poll a device memory address
1183  *                                until it is not zero or a timeout occurs
1184  * @hdev: pointer to habanalabs device structure
1185  * @addr: Device address to poll
1186  * @timeout_us: timeout in us
1187  * @val: Variable to read the value into
1188  *
1189  * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
1190  * case, the last read value at @addr is stored in @val. Must not
1191  * be called from atomic context if sleep_us or timeout_us are used.
1192  *
1193  * The function sleeps for 100us with timeout value of
1194  * timeout_us
1195  */
1196 int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
1197                                 u32 timeout_us, u32 *val)
1198 {
1199         ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
1200
1201         might_sleep();
1202
1203         for (;;) {
1204                 *val = readl(addr);
1205                 if (*val)
1206                         break;
1207                 if (ktime_compare(ktime_get(), timeout) > 0) {
1208                         *val = readl(addr);
1209                         break;
1210                 }
1211                 usleep_range((100 >> 2) + 1, 100);
1212         }
1213
1214         return *val ? 0 : -ETIMEDOUT;
1215 }
1216
1217 /*
1218  * MMIO register access helper functions.
1219  */
1220
1221 /*
1222  * hl_rreg - Read an MMIO register
1223  *
1224  * @hdev: pointer to habanalabs device structure
1225  * @reg: MMIO register offset (in bytes)
1226  *
1227  * Returns the value of the MMIO register we are asked to read
1228  *
1229  */
1230 inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
1231 {
1232         return readl(hdev->rmmio + reg);
1233 }
1234
1235 /*
1236  * hl_wreg - Write to an MMIO register
1237  *
1238  * @hdev: pointer to habanalabs device structure
1239  * @reg: MMIO register offset (in bytes)
1240  * @val: 32-bit value
1241  *
1242  * Writes the 32-bit value into the MMIO register
1243  *
1244  */
1245 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
1246 {
1247         writel(val, hdev->rmmio + reg);
1248 }