1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2019 HabanaLabs, Ltd.
9 #define pr_fmt(fmt) "habanalabs: " fmt
11 #include "habanalabs.h"
13 #include <linux/pci.h>
14 #include <linux/module.h>
16 #define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
18 #define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators"
20 MODULE_AUTHOR(HL_DRIVER_AUTHOR
);
21 MODULE_DESCRIPTION(HL_DRIVER_DESC
);
22 MODULE_LICENSE("GPL v2");
25 static struct class *hl_class
;
26 static DEFINE_IDR(hl_devs_idr
);
27 static DEFINE_MUTEX(hl_devs_idr_lock
);
29 static int timeout_locked
= 5;
30 static int reset_on_lockup
= 1;
32 module_param(timeout_locked
, int, 0444);
33 MODULE_PARM_DESC(timeout_locked
,
34 "Device lockup timeout in seconds (0 = disabled, default 5s)");
36 module_param(reset_on_lockup
, int, 0444);
37 MODULE_PARM_DESC(reset_on_lockup
,
38 "Do device reset on lockup (0 = no, 1 = yes, default yes)");
40 #define PCI_VENDOR_ID_HABANALABS 0x1da3
42 #define PCI_IDS_GOYA 0x0001
44 static const struct pci_device_id ids
[] = {
45 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS
, PCI_IDS_GOYA
), },
48 MODULE_DEVICE_TABLE(pci
, ids
);
51 * get_asic_type - translate device id to asic type
53 * @device: id of the PCI device
55 * Translate device id to asic type.
56 * In case of unidentified device, return -1
58 static enum hl_asic_type
get_asic_type(u16 device
)
60 enum hl_asic_type asic_type
;
64 asic_type
= ASIC_GOYA
;
67 asic_type
= ASIC_INVALID
;
75 * hl_device_open - open function for habanalabs device
77 * @inode: pointer to inode structure
78 * @filp: pointer to file structure
80 * Called when process opens an habanalabs device.
82 int hl_device_open(struct inode
*inode
, struct file
*filp
)
84 struct hl_device
*hdev
;
85 struct hl_fpriv
*hpriv
;
88 mutex_lock(&hl_devs_idr_lock
);
89 hdev
= idr_find(&hl_devs_idr
, iminor(inode
));
90 mutex_unlock(&hl_devs_idr_lock
);
93 pr_err("Couldn't find device %d:%d\n",
94 imajor(inode
), iminor(inode
));
98 hpriv
= kzalloc(sizeof(*hpriv
), GFP_KERNEL
);
103 filp
->private_data
= hpriv
;
105 mutex_init(&hpriv
->restore_phase_mutex
);
106 kref_init(&hpriv
->refcount
);
107 nonseekable_open(inode
, filp
);
109 hl_cb_mgr_init(&hpriv
->cb_mgr
);
110 hl_ctx_mgr_init(&hpriv
->ctx_mgr
);
112 hpriv
->taskpid
= find_get_pid(current
->pid
);
114 mutex_lock(&hdev
->fpriv_list_lock
);
116 if (hl_device_disabled_or_in_reset(hdev
)) {
117 dev_err_ratelimited(hdev
->dev
,
118 "Can't open %s because it is disabled or in reset\n",
119 dev_name(hdev
->dev
));
124 if (hdev
->in_debug
) {
125 dev_err_ratelimited(hdev
->dev
,
126 "Can't open %s because it is being debugged by another user\n",
127 dev_name(hdev
->dev
));
132 if (hdev
->compute_ctx
) {
133 dev_dbg_ratelimited(hdev
->dev
,
134 "Can't open %s because another user is working on it\n",
135 dev_name(hdev
->dev
));
140 rc
= hl_ctx_create(hdev
, hpriv
);
142 dev_err(hdev
->dev
, "Failed to create context %d\n", rc
);
146 /* Device is IDLE at this point so it is legal to change PLLs.
147 * There is no need to check anything because if the PLL is
148 * already HIGH, the set function will return without doing
151 hl_device_set_frequency(hdev
, PLL_HIGH
);
153 list_add(&hpriv
->dev_node
, &hdev
->fpriv_list
);
154 mutex_unlock(&hdev
->fpriv_list_lock
);
156 hl_debugfs_add_file(hpriv
);
161 mutex_unlock(&hdev
->fpriv_list_lock
);
163 hl_cb_mgr_fini(hpriv
->hdev
, &hpriv
->cb_mgr
);
164 hl_ctx_mgr_fini(hpriv
->hdev
, &hpriv
->ctx_mgr
);
165 filp
->private_data
= NULL
;
166 mutex_destroy(&hpriv
->restore_phase_mutex
);
167 put_pid(hpriv
->taskpid
);
173 int hl_device_open_ctrl(struct inode
*inode
, struct file
*filp
)
175 struct hl_device
*hdev
;
176 struct hl_fpriv
*hpriv
;
179 mutex_lock(&hl_devs_idr_lock
);
180 hdev
= idr_find(&hl_devs_idr
, iminor(inode
));
181 mutex_unlock(&hl_devs_idr_lock
);
184 pr_err("Couldn't find device %d:%d\n",
185 imajor(inode
), iminor(inode
));
189 hpriv
= kzalloc(sizeof(*hpriv
), GFP_KERNEL
);
193 mutex_lock(&hdev
->fpriv_list_lock
);
195 if (hl_device_disabled_or_in_reset(hdev
)) {
196 dev_err_ratelimited(hdev
->dev_ctrl
,
197 "Can't open %s because it is disabled or in reset\n",
198 dev_name(hdev
->dev_ctrl
));
203 list_add(&hpriv
->dev_node
, &hdev
->fpriv_list
);
204 mutex_unlock(&hdev
->fpriv_list_lock
);
207 filp
->private_data
= hpriv
;
209 hpriv
->is_control
= true;
210 nonseekable_open(inode
, filp
);
212 hpriv
->taskpid
= find_get_pid(current
->pid
);
217 mutex_unlock(&hdev
->fpriv_list_lock
);
222 static void set_driver_behavior_per_device(struct hl_device
*hdev
)
224 hdev
->mmu_enable
= 1;
225 hdev
->cpu_enable
= 1;
226 hdev
->fw_loading
= 1;
227 hdev
->cpu_queues_enable
= 1;
230 hdev
->reset_pcilink
= 0;
234 * create_hdev - create habanalabs device instance
236 * @dev: will hold the pointer to the new habanalabs device structure
237 * @pdev: pointer to the pci device
238 * @asic_type: in case of simulator device, which device is it
239 * @minor: in case of simulator device, the minor of the device
241 * Allocate memory for habanalabs device and initialize basic fields
242 * Identify the ASIC type
243 * Allocate ID (minor) for the device (only for real devices)
245 int create_hdev(struct hl_device
**dev
, struct pci_dev
*pdev
,
246 enum hl_asic_type asic_type
, int minor
)
248 struct hl_device
*hdev
;
249 int rc
, main_id
, ctrl_id
= 0;
253 hdev
= kzalloc(sizeof(*hdev
), GFP_KERNEL
);
257 /* First, we must find out which ASIC are we handling. This is needed
258 * to configure the behavior of the driver (kernel parameters)
261 hdev
->asic_type
= get_asic_type(pdev
->device
);
262 if (hdev
->asic_type
== ASIC_INVALID
) {
263 dev_err(&pdev
->dev
, "Unsupported ASIC\n");
268 hdev
->asic_type
= asic_type
;
271 hdev
->major
= hl_major
;
272 hdev
->reset_on_lockup
= reset_on_lockup
;
275 set_driver_behavior_per_device(hdev
);
278 hdev
->timeout_jiffies
= msecs_to_jiffies(timeout_locked
* 1000);
280 hdev
->timeout_jiffies
= MAX_SCHEDULE_TIMEOUT
;
282 hdev
->disabled
= true;
283 hdev
->pdev
= pdev
; /* can be NULL in case of simulator device */
285 /* Set default DMA mask to 32 bits */
288 mutex_lock(&hl_devs_idr_lock
);
290 /* Always save 2 numbers, 1 for main device and 1 for control.
291 * They must be consecutive
293 main_id
= idr_alloc(&hl_devs_idr
, hdev
, 0, HL_MAX_MINORS
,
297 ctrl_id
= idr_alloc(&hl_devs_idr
, hdev
, main_id
+ 1,
298 main_id
+ 2, GFP_KERNEL
);
300 mutex_unlock(&hl_devs_idr_lock
);
302 if ((main_id
< 0) || (ctrl_id
< 0)) {
303 if ((main_id
== -ENOSPC
) || (ctrl_id
== -ENOSPC
))
304 pr_err("too many devices in the system\n");
307 mutex_lock(&hl_devs_idr_lock
);
308 idr_remove(&hl_devs_idr
, main_id
);
309 mutex_unlock(&hl_devs_idr_lock
);
317 hdev
->id_control
= ctrl_id
;
329 * destroy_hdev - destroy habanalabs device instance
331 * @dev: pointer to the habanalabs device structure
334 void destroy_hdev(struct hl_device
*hdev
)
336 /* Remove device from the device list */
337 mutex_lock(&hl_devs_idr_lock
);
338 idr_remove(&hl_devs_idr
, hdev
->id
);
339 idr_remove(&hl_devs_idr
, hdev
->id_control
);
340 mutex_unlock(&hl_devs_idr_lock
);
345 static int hl_pmops_suspend(struct device
*dev
)
347 struct hl_device
*hdev
= dev_get_drvdata(dev
);
349 pr_debug("Going to suspend PCI device\n");
352 pr_err("device pointer is NULL in suspend\n");
356 return hl_device_suspend(hdev
);
359 static int hl_pmops_resume(struct device
*dev
)
361 struct hl_device
*hdev
= dev_get_drvdata(dev
);
363 pr_debug("Going to resume PCI device\n");
366 pr_err("device pointer is NULL in resume\n");
370 return hl_device_resume(hdev
);
374 * hl_pci_probe - probe PCI habanalabs devices
376 * @pdev: pointer to pci device
377 * @id: pointer to pci device id structure
379 * Standard PCI probe function for habanalabs device.
380 * Create a new habanalabs device and initialize it according to the
383 static int hl_pci_probe(struct pci_dev
*pdev
,
384 const struct pci_device_id
*id
)
386 struct hl_device
*hdev
;
389 dev_info(&pdev
->dev
, HL_NAME
390 " device found [%04x:%04x] (rev %x)\n",
391 (int)pdev
->vendor
, (int)pdev
->device
, (int)pdev
->revision
);
393 rc
= create_hdev(&hdev
, pdev
, ASIC_INVALID
, -1);
397 pci_set_drvdata(pdev
, hdev
);
399 rc
= hl_device_init(hdev
, hl_class
);
401 dev_err(&pdev
->dev
, "Fatal error during habanalabs device init\n");
409 pci_set_drvdata(pdev
, NULL
);
416 * hl_pci_remove - remove PCI habanalabs devices
418 * @pdev: pointer to pci device
420 * Standard PCI remove function for habanalabs device
422 static void hl_pci_remove(struct pci_dev
*pdev
)
424 struct hl_device
*hdev
;
426 hdev
= pci_get_drvdata(pdev
);
430 hl_device_fini(hdev
);
431 pci_set_drvdata(pdev
, NULL
);
436 static const struct dev_pm_ops hl_pm_ops
= {
437 .suspend
= hl_pmops_suspend
,
438 .resume
= hl_pmops_resume
,
441 static struct pci_driver hl_pci_driver
= {
444 .probe
= hl_pci_probe
,
445 .remove
= hl_pci_remove
,
446 .driver
.pm
= &hl_pm_ops
,
450 * hl_init - Initialize the habanalabs kernel driver
452 static int __init
hl_init(void)
457 pr_info("loading driver\n");
459 rc
= alloc_chrdev_region(&dev
, 0, HL_MAX_MINORS
, HL_NAME
);
461 pr_err("unable to get major\n");
465 hl_major
= MAJOR(dev
);
467 hl_class
= class_create(THIS_MODULE
, HL_NAME
);
468 if (IS_ERR(hl_class
)) {
469 pr_err("failed to allocate class\n");
470 rc
= PTR_ERR(hl_class
);
476 rc
= pci_register_driver(&hl_pci_driver
);
478 pr_err("failed to register pci device\n");
482 pr_debug("driver loaded\n");
488 class_destroy(hl_class
);
490 unregister_chrdev_region(MKDEV(hl_major
, 0), HL_MAX_MINORS
);
495 * hl_exit - Release all resources of the habanalabs kernel driver
497 static void __exit
hl_exit(void)
499 pci_unregister_driver(&hl_pci_driver
);
502 * Removing debugfs must be after all devices or simulator devices
503 * have been removed because otherwise we get a bug in the
504 * debugfs module for referencing NULL objects
508 class_destroy(hl_class
);
509 unregister_chrdev_region(MKDEV(hl_major
, 0), HL_MAX_MINORS
);
511 idr_destroy(&hl_devs_idr
);
513 pr_debug("driver removed\n");
516 module_init(hl_init
);
517 module_exit(hl_exit
);