treewide: remove redundant IS_ERR() before error code check
[linux/fpc-iii.git] / drivers / misc / habanalabs / habanalabs_drv.c
blob8c342fb499ca66d8a60154ede216d462e191e3e2
1 // SPDX-License-Identifier: GPL-2.0
3 /*
4 * Copyright 2016-2019 HabanaLabs, Ltd.
5 * All Rights Reserved.
7 */
9 #define pr_fmt(fmt) "habanalabs: " fmt
11 #include "habanalabs.h"
13 #include <linux/pci.h>
14 #include <linux/module.h>
16 #define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
18 #define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators"
20 MODULE_AUTHOR(HL_DRIVER_AUTHOR);
21 MODULE_DESCRIPTION(HL_DRIVER_DESC);
22 MODULE_LICENSE("GPL v2");
24 static int hl_major;
25 static struct class *hl_class;
26 static DEFINE_IDR(hl_devs_idr);
27 static DEFINE_MUTEX(hl_devs_idr_lock);
29 static int timeout_locked = 5;
30 static int reset_on_lockup = 1;
32 module_param(timeout_locked, int, 0444);
33 MODULE_PARM_DESC(timeout_locked,
34 "Device lockup timeout in seconds (0 = disabled, default 5s)");
36 module_param(reset_on_lockup, int, 0444);
37 MODULE_PARM_DESC(reset_on_lockup,
38 "Do device reset on lockup (0 = no, 1 = yes, default yes)");
40 #define PCI_VENDOR_ID_HABANALABS 0x1da3
42 #define PCI_IDS_GOYA 0x0001
44 static const struct pci_device_id ids[] = {
45 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
46 { 0, }
48 MODULE_DEVICE_TABLE(pci, ids);
51 * get_asic_type - translate device id to asic type
53 * @device: id of the PCI device
55 * Translate device id to asic type.
56 * In case of unidentified device, return -1
58 static enum hl_asic_type get_asic_type(u16 device)
60 enum hl_asic_type asic_type;
62 switch (device) {
63 case PCI_IDS_GOYA:
64 asic_type = ASIC_GOYA;
65 break;
66 default:
67 asic_type = ASIC_INVALID;
68 break;
71 return asic_type;
75 * hl_device_open - open function for habanalabs device
77 * @inode: pointer to inode structure
78 * @filp: pointer to file structure
80 * Called when process opens an habanalabs device.
82 int hl_device_open(struct inode *inode, struct file *filp)
84 struct hl_device *hdev;
85 struct hl_fpriv *hpriv;
86 int rc;
88 mutex_lock(&hl_devs_idr_lock);
89 hdev = idr_find(&hl_devs_idr, iminor(inode));
90 mutex_unlock(&hl_devs_idr_lock);
92 if (!hdev) {
93 pr_err("Couldn't find device %d:%d\n",
94 imajor(inode), iminor(inode));
95 return -ENXIO;
98 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
99 if (!hpriv)
100 return -ENOMEM;
102 hpriv->hdev = hdev;
103 filp->private_data = hpriv;
104 hpriv->filp = filp;
105 mutex_init(&hpriv->restore_phase_mutex);
106 kref_init(&hpriv->refcount);
107 nonseekable_open(inode, filp);
109 hl_cb_mgr_init(&hpriv->cb_mgr);
110 hl_ctx_mgr_init(&hpriv->ctx_mgr);
112 hpriv->taskpid = find_get_pid(current->pid);
114 mutex_lock(&hdev->fpriv_list_lock);
116 if (hl_device_disabled_or_in_reset(hdev)) {
117 dev_err_ratelimited(hdev->dev,
118 "Can't open %s because it is disabled or in reset\n",
119 dev_name(hdev->dev));
120 rc = -EPERM;
121 goto out_err;
124 if (hdev->in_debug) {
125 dev_err_ratelimited(hdev->dev,
126 "Can't open %s because it is being debugged by another user\n",
127 dev_name(hdev->dev));
128 rc = -EPERM;
129 goto out_err;
132 if (hdev->compute_ctx) {
133 dev_dbg_ratelimited(hdev->dev,
134 "Can't open %s because another user is working on it\n",
135 dev_name(hdev->dev));
136 rc = -EBUSY;
137 goto out_err;
140 rc = hl_ctx_create(hdev, hpriv);
141 if (rc) {
142 dev_err(hdev->dev, "Failed to create context %d\n", rc);
143 goto out_err;
146 /* Device is IDLE at this point so it is legal to change PLLs.
147 * There is no need to check anything because if the PLL is
148 * already HIGH, the set function will return without doing
149 * anything
151 hl_device_set_frequency(hdev, PLL_HIGH);
153 list_add(&hpriv->dev_node, &hdev->fpriv_list);
154 mutex_unlock(&hdev->fpriv_list_lock);
156 hl_debugfs_add_file(hpriv);
158 return 0;
160 out_err:
161 mutex_unlock(&hdev->fpriv_list_lock);
163 hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
164 hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
165 filp->private_data = NULL;
166 mutex_destroy(&hpriv->restore_phase_mutex);
167 put_pid(hpriv->taskpid);
169 kfree(hpriv);
170 return rc;
173 int hl_device_open_ctrl(struct inode *inode, struct file *filp)
175 struct hl_device *hdev;
176 struct hl_fpriv *hpriv;
177 int rc;
179 mutex_lock(&hl_devs_idr_lock);
180 hdev = idr_find(&hl_devs_idr, iminor(inode));
181 mutex_unlock(&hl_devs_idr_lock);
183 if (!hdev) {
184 pr_err("Couldn't find device %d:%d\n",
185 imajor(inode), iminor(inode));
186 return -ENXIO;
189 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
190 if (!hpriv)
191 return -ENOMEM;
193 mutex_lock(&hdev->fpriv_list_lock);
195 if (hl_device_disabled_or_in_reset(hdev)) {
196 dev_err_ratelimited(hdev->dev_ctrl,
197 "Can't open %s because it is disabled or in reset\n",
198 dev_name(hdev->dev_ctrl));
199 rc = -EPERM;
200 goto out_err;
203 list_add(&hpriv->dev_node, &hdev->fpriv_list);
204 mutex_unlock(&hdev->fpriv_list_lock);
206 hpriv->hdev = hdev;
207 filp->private_data = hpriv;
208 hpriv->filp = filp;
209 hpriv->is_control = true;
210 nonseekable_open(inode, filp);
212 hpriv->taskpid = find_get_pid(current->pid);
214 return 0;
216 out_err:
217 mutex_unlock(&hdev->fpriv_list_lock);
218 kfree(hpriv);
219 return rc;
222 static void set_driver_behavior_per_device(struct hl_device *hdev)
224 hdev->mmu_enable = 1;
225 hdev->cpu_enable = 1;
226 hdev->fw_loading = 1;
227 hdev->cpu_queues_enable = 1;
228 hdev->heartbeat = 1;
230 hdev->reset_pcilink = 0;
234 * create_hdev - create habanalabs device instance
236 * @dev: will hold the pointer to the new habanalabs device structure
237 * @pdev: pointer to the pci device
238 * @asic_type: in case of simulator device, which device is it
239 * @minor: in case of simulator device, the minor of the device
241 * Allocate memory for habanalabs device and initialize basic fields
242 * Identify the ASIC type
243 * Allocate ID (minor) for the device (only for real devices)
245 int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
246 enum hl_asic_type asic_type, int minor)
248 struct hl_device *hdev;
249 int rc, main_id, ctrl_id = 0;
251 *dev = NULL;
253 hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
254 if (!hdev)
255 return -ENOMEM;
257 /* First, we must find out which ASIC are we handling. This is needed
258 * to configure the behavior of the driver (kernel parameters)
260 if (pdev) {
261 hdev->asic_type = get_asic_type(pdev->device);
262 if (hdev->asic_type == ASIC_INVALID) {
263 dev_err(&pdev->dev, "Unsupported ASIC\n");
264 rc = -ENODEV;
265 goto free_hdev;
267 } else {
268 hdev->asic_type = asic_type;
271 hdev->major = hl_major;
272 hdev->reset_on_lockup = reset_on_lockup;
273 hdev->pldm = 0;
275 set_driver_behavior_per_device(hdev);
277 if (timeout_locked)
278 hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
279 else
280 hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
282 hdev->disabled = true;
283 hdev->pdev = pdev; /* can be NULL in case of simulator device */
285 /* Set default DMA mask to 32 bits */
286 hdev->dma_mask = 32;
288 mutex_lock(&hl_devs_idr_lock);
290 /* Always save 2 numbers, 1 for main device and 1 for control.
291 * They must be consecutive
293 main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
294 GFP_KERNEL);
296 if (main_id >= 0)
297 ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
298 main_id + 2, GFP_KERNEL);
300 mutex_unlock(&hl_devs_idr_lock);
302 if ((main_id < 0) || (ctrl_id < 0)) {
303 if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
304 pr_err("too many devices in the system\n");
306 if (main_id >= 0) {
307 mutex_lock(&hl_devs_idr_lock);
308 idr_remove(&hl_devs_idr, main_id);
309 mutex_unlock(&hl_devs_idr_lock);
312 rc = -EBUSY;
313 goto free_hdev;
316 hdev->id = main_id;
317 hdev->id_control = ctrl_id;
319 *dev = hdev;
321 return 0;
323 free_hdev:
324 kfree(hdev);
325 return rc;
329 * destroy_hdev - destroy habanalabs device instance
331 * @dev: pointer to the habanalabs device structure
334 void destroy_hdev(struct hl_device *hdev)
336 /* Remove device from the device list */
337 mutex_lock(&hl_devs_idr_lock);
338 idr_remove(&hl_devs_idr, hdev->id);
339 idr_remove(&hl_devs_idr, hdev->id_control);
340 mutex_unlock(&hl_devs_idr_lock);
342 kfree(hdev);
345 static int hl_pmops_suspend(struct device *dev)
347 struct hl_device *hdev = dev_get_drvdata(dev);
349 pr_debug("Going to suspend PCI device\n");
351 if (!hdev) {
352 pr_err("device pointer is NULL in suspend\n");
353 return 0;
356 return hl_device_suspend(hdev);
359 static int hl_pmops_resume(struct device *dev)
361 struct hl_device *hdev = dev_get_drvdata(dev);
363 pr_debug("Going to resume PCI device\n");
365 if (!hdev) {
366 pr_err("device pointer is NULL in resume\n");
367 return 0;
370 return hl_device_resume(hdev);
374 * hl_pci_probe - probe PCI habanalabs devices
376 * @pdev: pointer to pci device
377 * @id: pointer to pci device id structure
379 * Standard PCI probe function for habanalabs device.
380 * Create a new habanalabs device and initialize it according to the
381 * device's type
383 static int hl_pci_probe(struct pci_dev *pdev,
384 const struct pci_device_id *id)
386 struct hl_device *hdev;
387 int rc;
389 dev_info(&pdev->dev, HL_NAME
390 " device found [%04x:%04x] (rev %x)\n",
391 (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
393 rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1);
394 if (rc)
395 return rc;
397 pci_set_drvdata(pdev, hdev);
399 rc = hl_device_init(hdev, hl_class);
400 if (rc) {
401 dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
402 rc = -ENODEV;
403 goto disable_device;
406 return 0;
408 disable_device:
409 pci_set_drvdata(pdev, NULL);
410 destroy_hdev(hdev);
412 return rc;
416 * hl_pci_remove - remove PCI habanalabs devices
418 * @pdev: pointer to pci device
420 * Standard PCI remove function for habanalabs device
422 static void hl_pci_remove(struct pci_dev *pdev)
424 struct hl_device *hdev;
426 hdev = pci_get_drvdata(pdev);
427 if (!hdev)
428 return;
430 hl_device_fini(hdev);
431 pci_set_drvdata(pdev, NULL);
433 destroy_hdev(hdev);
436 static const struct dev_pm_ops hl_pm_ops = {
437 .suspend = hl_pmops_suspend,
438 .resume = hl_pmops_resume,
441 static struct pci_driver hl_pci_driver = {
442 .name = HL_NAME,
443 .id_table = ids,
444 .probe = hl_pci_probe,
445 .remove = hl_pci_remove,
446 .driver.pm = &hl_pm_ops,
450 * hl_init - Initialize the habanalabs kernel driver
452 static int __init hl_init(void)
454 int rc;
455 dev_t dev;
457 pr_info("loading driver\n");
459 rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
460 if (rc < 0) {
461 pr_err("unable to get major\n");
462 return rc;
465 hl_major = MAJOR(dev);
467 hl_class = class_create(THIS_MODULE, HL_NAME);
468 if (IS_ERR(hl_class)) {
469 pr_err("failed to allocate class\n");
470 rc = PTR_ERR(hl_class);
471 goto remove_major;
474 hl_debugfs_init();
476 rc = pci_register_driver(&hl_pci_driver);
477 if (rc) {
478 pr_err("failed to register pci device\n");
479 goto remove_debugfs;
482 pr_debug("driver loaded\n");
484 return 0;
486 remove_debugfs:
487 hl_debugfs_fini();
488 class_destroy(hl_class);
489 remove_major:
490 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
491 return rc;
495 * hl_exit - Release all resources of the habanalabs kernel driver
497 static void __exit hl_exit(void)
499 pci_unregister_driver(&hl_pci_driver);
502 * Removing debugfs must be after all devices or simulator devices
503 * have been removed because otherwise we get a bug in the
504 * debugfs module for referencing NULL objects
506 hl_debugfs_fini();
508 class_destroy(hl_class);
509 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
511 idr_destroy(&hl_devs_idr);
513 pr_debug("driver removed\n");
516 module_init(hl_init);
517 module_exit(hl_exit);