Skip to content

Commit 3b9abb4

Browse files
Tomer Tayarogabbay
authored andcommitted
accel/habanalabs: expose debugfs files later
Currently the debugfs root folder and files for a device are created at an early step, before the device initialization and before the char device and sysfs files are exposed to user. As there is no real reason not to do it together with the device creation, postpone it to be done right afterwards. The initialization of the debugfs entry structure is left in its current position because it is used before creating the files. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
1 parent d8b9cea commit 3b9abb4

3 files changed

Lines changed: 61 additions & 44 deletions

File tree

drivers/accel/habanalabs/common/debugfs.c

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1756,17 +1756,15 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
17561756
}
17571757
}
17581758

1759-
void hl_debugfs_add_device(struct hl_device *hdev)
1759+
int hl_debugfs_device_init(struct hl_device *hdev)
17601760
{
17611761
struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
17621762
int count = ARRAY_SIZE(hl_debugfs_list);
17631763

17641764
dev_entry->hdev = hdev;
1765-
dev_entry->entry_arr = kmalloc_array(count,
1766-
sizeof(struct hl_debugfs_entry),
1767-
GFP_KERNEL);
1765+
dev_entry->entry_arr = kmalloc_array(count, sizeof(struct hl_debugfs_entry), GFP_KERNEL);
17681766
if (!dev_entry->entry_arr)
1769-
return;
1767+
return -ENOMEM;
17701768

17711769
dev_entry->data_dma_blob_desc.size = 0;
17721770
dev_entry->data_dma_blob_desc.data = NULL;
@@ -1787,21 +1785,14 @@ void hl_debugfs_add_device(struct hl_device *hdev)
17871785
spin_lock_init(&dev_entry->userptr_spinlock);
17881786
mutex_init(&dev_entry->ctx_mem_hash_mutex);
17891787

1790-
dev_entry->root = debugfs_create_dir(dev_name(hdev->dev),
1791-
hl_debug_root);
1792-
1793-
add_files_to_device(hdev, dev_entry, dev_entry->root);
1794-
if (!hdev->asic_prop.fw_security_enabled)
1795-
add_secured_nodes(dev_entry, dev_entry->root);
1788+
return 0;
17961789
}
17971790

1798-
void hl_debugfs_remove_device(struct hl_device *hdev)
1791+
void hl_debugfs_device_fini(struct hl_device *hdev)
17991792
{
18001793
struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
18011794
int i;
18021795

1803-
debugfs_remove_recursive(entry->root);
1804-
18051796
mutex_destroy(&entry->ctx_mem_hash_mutex);
18061797
mutex_destroy(&entry->file_mutex);
18071798

@@ -1814,6 +1805,24 @@ void hl_debugfs_remove_device(struct hl_device *hdev)
18141805
kfree(entry->entry_arr);
18151806
}
18161807

1808+
void hl_debugfs_add_device(struct hl_device *hdev)
1809+
{
1810+
struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
1811+
1812+
dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), hl_debug_root);
1813+
1814+
add_files_to_device(hdev, dev_entry, dev_entry->root);
1815+
if (!hdev->asic_prop.fw_security_enabled)
1816+
add_secured_nodes(dev_entry, dev_entry->root);
1817+
}
1818+
1819+
void hl_debugfs_remove_device(struct hl_device *hdev)
1820+
{
1821+
struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
1822+
1823+
debugfs_remove_recursive(entry->root);
1824+
}
1825+
18171826
void hl_debugfs_add_file(struct hl_fpriv *hpriv)
18181827
{
18191828
struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;

drivers/accel/habanalabs/common/device.c

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,7 @@ static int device_init_cdev(struct hl_device *hdev, struct class *class,
674674
return 0;
675675
}
676676

677-
static int device_cdev_sysfs_add(struct hl_device *hdev)
677+
static int cdev_sysfs_debugfs_add(struct hl_device *hdev)
678678
{
679679
int rc;
680680

@@ -699,7 +699,9 @@ static int device_cdev_sysfs_add(struct hl_device *hdev)
699699
goto delete_ctrl_cdev_device;
700700
}
701701

702-
hdev->cdev_sysfs_created = true;
702+
hl_debugfs_add_device(hdev);
703+
704+
hdev->cdev_sysfs_debugfs_created = true;
703705

704706
return 0;
705707

@@ -710,11 +712,12 @@ static int device_cdev_sysfs_add(struct hl_device *hdev)
710712
return rc;
711713
}
712714

713-
static void device_cdev_sysfs_del(struct hl_device *hdev)
715+
static void cdev_sysfs_debugfs_remove(struct hl_device *hdev)
714716
{
715-
if (!hdev->cdev_sysfs_created)
717+
if (!hdev->cdev_sysfs_debugfs_created)
716718
goto put_devices;
717719

720+
hl_debugfs_remove_device(hdev);
718721
hl_sysfs_fini(hdev);
719722
cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
720723
cdev_device_del(&hdev->cdev, hdev->dev);
@@ -2054,7 +2057,7 @@ static int create_cdev(struct hl_device *hdev)
20542057
int hl_device_init(struct hl_device *hdev)
20552058
{
20562059
int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
2057-
bool add_cdev_sysfs_on_err = false;
2060+
bool expose_interfaces_on_err = false;
20582061

20592062
rc = create_cdev(hdev);
20602063
if (rc)
@@ -2170,16 +2173,22 @@ int hl_device_init(struct hl_device *hdev)
21702173
hdev->device_release_watchdog_timeout_sec = HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC;
21712174

21722175
hdev->memory_scrub_val = MEM_SCRUB_DEFAULT_VAL;
2173-
hl_debugfs_add_device(hdev);
21742176

2175-
/* debugfs nodes are created in hl_ctx_init so it must be called after
2176-
* hl_debugfs_add_device.
2177+
rc = hl_debugfs_device_init(hdev);
2178+
if (rc) {
2179+
dev_err(hdev->dev, "failed to initialize debugfs entry structure\n");
2180+
kfree(hdev->kernel_ctx);
2181+
goto mmu_fini;
2182+
}
2183+
2184+
/* The debugfs entry structure is accessed in hl_ctx_init(), so it must be called after
2185+
* hl_debugfs_device_init().
21772186
*/
21782187
rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
21792188
if (rc) {
21802189
dev_err(hdev->dev, "failed to initialize kernel context\n");
21812190
kfree(hdev->kernel_ctx);
2182-
goto remove_device_from_debugfs;
2191+
goto debugfs_device_fini;
21832192
}
21842193

21852194
rc = hl_cb_pool_init(hdev);
@@ -2195,11 +2204,10 @@ int hl_device_init(struct hl_device *hdev)
21952204
}
21962205

21972206
/*
2198-
* From this point, override rc (=0) in case of an error to allow
2199-
* debugging (by adding char devices and create sysfs nodes as part of
2200-
* the error flow).
2207+
* From this point, override rc (=0) in case of an error to allow debugging
2208+
* (by adding char devices and creating sysfs/debugfs files as part of the error flow).
22012209
*/
2202-
add_cdev_sysfs_on_err = true;
2210+
expose_interfaces_on_err = true;
22032211

22042212
/* Device is now enabled as part of the initialization requires
22052213
* communication with the device firmware to get information that
@@ -2241,15 +2249,13 @@ int hl_device_init(struct hl_device *hdev)
22412249
}
22422250

22432251
/*
2244-
* Expose devices and sysfs nodes to user.
2245-
* From here there is no need to add char devices and create sysfs nodes
2246-
* in case of an error.
2252+
* Expose devices and sysfs/debugfs files to user.
2253+
* From here there is no need to expose them in case of an error.
22472254
*/
2248-
add_cdev_sysfs_on_err = false;
2249-
rc = device_cdev_sysfs_add(hdev);
2255+
expose_interfaces_on_err = false;
2256+
rc = cdev_sysfs_debugfs_add(hdev);
22502257
if (rc) {
2251-
dev_err(hdev->dev,
2252-
"Failed to add char devices and sysfs nodes\n");
2258+
dev_err(hdev->dev, "Failed to add char devices and sysfs/debugfs files\n");
22532259
rc = 0;
22542260
goto out_disabled;
22552261
}
@@ -2295,8 +2301,8 @@ int hl_device_init(struct hl_device *hdev)
22952301
if (hl_ctx_put(hdev->kernel_ctx) != 1)
22962302
dev_err(hdev->dev,
22972303
"kernel ctx is still alive on initialization failure\n");
2298-
remove_device_from_debugfs:
2299-
hl_debugfs_remove_device(hdev);
2304+
debugfs_device_fini:
2305+
hl_debugfs_device_fini(hdev);
23002306
mmu_fini:
23012307
hl_mmu_fini(hdev);
23022308
eq_fini:
@@ -2320,8 +2326,8 @@ int hl_device_init(struct hl_device *hdev)
23202326
put_device(hdev->dev);
23212327
out_disabled:
23222328
hdev->disabled = true;
2323-
if (add_cdev_sysfs_on_err)
2324-
device_cdev_sysfs_add(hdev);
2329+
if (expose_interfaces_on_err)
2330+
cdev_sysfs_debugfs_add(hdev);
23252331
if (hdev->pdev)
23262332
dev_err(&hdev->pdev->dev,
23272333
"Failed to initialize hl%d. Device %s is NOT usable !\n",
@@ -2447,8 +2453,6 @@ void hl_device_fini(struct hl_device *hdev)
24472453
if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
24482454
dev_err(hdev->dev, "kernel ctx is still alive\n");
24492455

2450-
hl_debugfs_remove_device(hdev);
2451-
24522456
hl_dec_fini(hdev);
24532457

24542458
hl_vm_fini(hdev);
@@ -2473,8 +2477,10 @@ void hl_device_fini(struct hl_device *hdev)
24732477

24742478
device_early_fini(hdev);
24752479

2476-
/* Hide devices and sysfs nodes from user */
2477-
device_cdev_sysfs_del(hdev);
2480+
/* Hide devices and sysfs/debugfs files from user */
2481+
cdev_sysfs_debugfs_remove(hdev);
2482+
2483+
hl_debugfs_device_fini(hdev);
24782484

24792485
pr_info("removed device successfully\n");
24802486
}

drivers/accel/habanalabs/common/habanalabs.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3292,7 +3292,7 @@ struct hl_reset_info {
32923292
* @in_debug: whether the device is in a state where the profiling/tracing infrastructure
32933293
* can be used. This indication is needed because in some ASICs we need to do
32943294
* specific operations to enable that infrastructure.
3295-
* @cdev_sysfs_created: were char devices and sysfs nodes created.
3295+
* @cdev_sysfs_debugfs_created: were char devices and sysfs/debugfs files created.
32963296
* @stop_on_err: true if engines should stop on error.
32973297
* @supports_sync_stream: is sync stream supported.
32983298
* @sync_stream_queue_idx: helper index for sync stream queues initialization.
@@ -3459,7 +3459,7 @@ struct hl_device {
34593459
u8 init_done;
34603460
u8 device_cpu_disabled;
34613461
u8 in_debug;
3462-
u8 cdev_sysfs_created;
3462+
u8 cdev_sysfs_debugfs_created;
34633463
u8 stop_on_err;
34643464
u8 supports_sync_stream;
34653465
u8 sync_stream_queue_idx;
@@ -3978,6 +3978,8 @@ void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info);
39783978

39793979
void hl_debugfs_init(void);
39803980
void hl_debugfs_fini(void);
3981+
int hl_debugfs_device_init(struct hl_device *hdev);
3982+
void hl_debugfs_device_fini(struct hl_device *hdev);
39813983
void hl_debugfs_add_device(struct hl_device *hdev);
39823984
void hl_debugfs_remove_device(struct hl_device *hdev);
39833985
void hl_debugfs_add_file(struct hl_fpriv *hpriv);

0 commit comments

Comments
 (0)