Skip to content

Commit 4cb92fa

Browse files
Benjamin-Blockhcahca
authored andcommitted
s390/pci: Fix cyclic dead-lock in zpci_zdev_put() and zpci_scan_devices()
When triggering PCI device recovery by writing into the SysFS attribute `recover` of a Physical Function with existing child SR-IOV Virtual Functions, lockdep is reporting a possible deadlock between three threads: Thread (A) Thread (B) Thread (C) | | | recover_store() zpci_scan_devices() zpci_scan_devices() lock(pci_rescan_remove_lock) | | | | | | | zpci_bus_scan_busses() | | lock(zbus_list_lock) | zpci_add_device() | | lock(zpci_add_remove_lock) | | | ┴ | | zpci_bus_scan_bus() | | lock(pci_rescan_remove_lock) ┴ | zpci_zdev_put() | lock(zpci_add_remove_lock) | ┴ zpci_bus_get() lock(zbus_list_lock) In zpci_bus_scan_busses() the `zbus_list_lock` is taken for the whole duration of the function, which also includes taking `pci_rescan_remove_lock`, among other things. But `zbus_list_lock` only really needs to protect the modification of the global registration `zbus_list`, it can be dropped while the functions within the list iteration run; this way we break the cycle above. Break up zpci_bus_scan_busses() into an "iterator" zpci_bus_get_next() that iterates over `zbus_list` element by element, and acquires and releases `zbus_list_lock` as necessary, but never keep holding it. References to `zpci_bus` objects are also acquired and released. The reference counting on `zpci_bus` objects is also changed so that all put() and get() operations are done under the protection of `zbus_list_lock`, and if the operation results in a modification of `zpci_bus_list`, this modification is done in the same critical section (apart the very first initialization). This way objects are never seen on the list that are about to be released and/or half-initialized. Fixes: 14c87ba ("s390/pci: separate zbus registration from scanning") Suggested-by: Niklas Schnelle <schnelle@linux.ibm.com> Signed-off-by: Benjamin Block <bblock@linux.ibm.com> Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com> Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
1 parent b1aa01d commit 4cb92fa

4 files changed

Lines changed: 91 additions & 29 deletions

File tree

.clang-format

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,7 @@ ForEachMacros:
748748
- 'ynl_attr_for_each_nested'
749749
- 'ynl_attr_for_each_payload'
750750
- 'zorro_for_each_dev'
751+
- 'zpci_bus_for_each'
751752

752753
IncludeBlocks: Preserve
753754
IncludeCategories:

arch/s390/pci/pci.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1148,6 +1148,7 @@ static void zpci_add_devices(struct list_head *scan_list)
11481148

11491149
int zpci_scan_devices(void)
11501150
{
1151+
struct zpci_bus *zbus;
11511152
LIST_HEAD(scan_list);
11521153
int rc;
11531154

@@ -1156,7 +1157,10 @@ int zpci_scan_devices(void)
11561157
return rc;
11571158

11581159
zpci_add_devices(&scan_list);
1159-
zpci_bus_scan_busses();
1160+
zpci_bus_for_each(zbus) {
1161+
zpci_bus_scan_bus(zbus);
1162+
cond_resched();
1163+
}
11601164
return 0;
11611165
}
11621166

arch/s390/pci/pci_bus.c

Lines changed: 71 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -153,23 +153,6 @@ int zpci_bus_scan_bus(struct zpci_bus *zbus)
153153
return ret;
154154
}
155155

156-
/* zpci_bus_scan_busses - Scan all registered busses
157-
*
158-
* Scan all available zbusses
159-
*
160-
*/
161-
void zpci_bus_scan_busses(void)
162-
{
163-
struct zpci_bus *zbus = NULL;
164-
165-
mutex_lock(&zbus_list_lock);
166-
list_for_each_entry(zbus, &zbus_list, bus_next) {
167-
zpci_bus_scan_bus(zbus);
168-
cond_resched();
169-
}
170-
mutex_unlock(&zbus_list_lock);
171-
}
172-
173156
static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev)
174157
{
175158
return !s390_pci_no_rid && zdev->rid_available &&
@@ -222,10 +205,29 @@ static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, s
222205
return -ENOMEM;
223206
}
224207

225-
static void zpci_bus_release(struct kref *kref)
208+
/**
209+
* zpci_bus_release - Un-initialize resources associated with the zbus and
210+
* free memory
211+
* @kref: refcount * that is part of struct zpci_bus
212+
*
213+
* MUST be called with `zbus_list_lock` held, but the lock is released during
214+
* run of the function.
215+
*/
216+
static inline void zpci_bus_release(struct kref *kref)
217+
__releases(&zbus_list_lock)
226218
{
227219
struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref);
228220

221+
lockdep_assert_held(&zbus_list_lock);
222+
223+
list_del(&zbus->bus_next);
224+
mutex_unlock(&zbus_list_lock);
225+
226+
/*
227+
* At this point no-one should see this object, or be able to get a new
228+
* reference to it.
229+
*/
230+
229231
if (zbus->bus) {
230232
pci_lock_rescan_remove();
231233
pci_stop_root_bus(zbus->bus);
@@ -237,16 +239,19 @@ static void zpci_bus_release(struct kref *kref)
237239
pci_unlock_rescan_remove();
238240
}
239241

240-
mutex_lock(&zbus_list_lock);
241-
list_del(&zbus->bus_next);
242-
mutex_unlock(&zbus_list_lock);
243242
zpci_remove_parent_msi_domain(zbus);
244243
kfree(zbus);
245244
}
246245

247-
static void zpci_bus_put(struct zpci_bus *zbus)
246+
static inline void __zpci_bus_get(struct zpci_bus *zbus)
247+
{
248+
lockdep_assert_held(&zbus_list_lock);
249+
kref_get(&zbus->kref);
250+
}
251+
252+
static inline void zpci_bus_put(struct zpci_bus *zbus)
248253
{
249-
kref_put(&zbus->kref, zpci_bus_release);
254+
kref_put_mutex(&zbus->kref, zpci_bus_release, &zbus_list_lock);
250255
}
251256

252257
static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
@@ -258,7 +263,7 @@ static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
258263
if (!zbus->multifunction)
259264
continue;
260265
if (topo_is_tid == zbus->topo_is_tid && topo == zbus->topo) {
261-
kref_get(&zbus->kref);
266+
__zpci_bus_get(zbus);
262267
goto out_unlock;
263268
}
264269
}
@@ -268,6 +273,44 @@ static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
268273
return zbus;
269274
}
270275

276+
/**
277+
* zpci_bus_get_next - get the next zbus object from given position in the list
278+
* @pos: current position/cursor in the global zbus list
279+
*
280+
* Acquires and releases references as the cursor iterates (might also free/
281+
* release the cursor). Is tolerant of concurrent operations on the list.
282+
*
283+
* To begin the iteration, set *@pos to %NULL before calling the function.
284+
*
285+
* *@pos is set to %NULL in cases where either the list is empty, or *@pos is
286+
* the last element in the list.
287+
*
288+
* Context: Process context. May sleep.
289+
*/
290+
void zpci_bus_get_next(struct zpci_bus **pos)
291+
{
292+
struct zpci_bus *curp = *pos, *next = NULL;
293+
294+
mutex_lock(&zbus_list_lock);
295+
if (curp)
296+
next = list_next_entry(curp, bus_next);
297+
else
298+
next = list_first_entry(&zbus_list, typeof(*curp), bus_next);
299+
300+
if (list_entry_is_head(next, &zbus_list, bus_next))
301+
next = NULL;
302+
303+
if (next)
304+
__zpci_bus_get(next);
305+
306+
*pos = next;
307+
mutex_unlock(&zbus_list_lock);
308+
309+
/* zpci_bus_put() might drop refcount to 0 and locks zbus_list_lock */
310+
if (curp)
311+
zpci_bus_put(curp);
312+
}
313+
271314
static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
272315
{
273316
struct zpci_bus *zbus;
@@ -279,9 +322,6 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
279322
zbus->topo = topo;
280323
zbus->topo_is_tid = topo_is_tid;
281324
INIT_LIST_HEAD(&zbus->bus_next);
282-
mutex_lock(&zbus_list_lock);
283-
list_add_tail(&zbus->bus_next, &zbus_list);
284-
mutex_unlock(&zbus_list_lock);
285325

286326
kref_init(&zbus->kref);
287327
INIT_LIST_HEAD(&zbus->resources);
@@ -291,6 +331,10 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
291331
zbus->bus_resource.flags = IORESOURCE_BUS;
292332
pci_add_resource(&zbus->resources, &zbus->bus_resource);
293333

334+
mutex_lock(&zbus_list_lock);
335+
list_add_tail(&zbus->bus_next, &zbus_list);
336+
mutex_unlock(&zbus_list_lock);
337+
294338
return zbus;
295339
}
296340

arch/s390/pci/pci_bus.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,20 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
1515
void zpci_bus_device_unregister(struct zpci_dev *zdev);
1616

1717
int zpci_bus_scan_bus(struct zpci_bus *zbus);
18-
void zpci_bus_scan_busses(void);
18+
void zpci_bus_get_next(struct zpci_bus **pos);
19+
20+
/**
21+
* zpci_bus_for_each - iterate over all the registered zbus objects
22+
* @pos: a struct zpci_bus * as cursor
23+
*
24+
* Acquires and releases references as the cursor iterates over the registered
25+
* objects. Is tolerant against concurrent removals of objects.
26+
*
27+
* Context: Process context. May sleep.
28+
*/
29+
#define zpci_bus_for_each(pos) \
30+
for ((pos) = NULL, zpci_bus_get_next(&(pos)); (pos) != NULL; \
31+
zpci_bus_get_next(&(pos)))
1932

2033
int zpci_bus_scan_device(struct zpci_dev *zdev);
2134
void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);

0 commit comments

Comments
 (0)