Skip to content

Commit 60cf863

Browse files
Ming Leiaxboe
authored andcommitted
selftests/ublk: add test for async partition scan
Add test_generic_15.sh to verify that async partition scan prevents IO hang when reading partition tables. The test creates ublk devices with fault_inject target and very large delay (60s) to simulate blocked partition table reads, then kills the daemon to verify proper state transitions without hanging: 1. Without recovery support: - Create device with fault_inject and 60s delay - Kill daemon while partition scan may be blocked - Verify device transitions to DEAD state 2. With recovery support (-r 1): - Create device with fault_inject, 60s delay, and recovery - Kill daemon while partition scan may be blocked - Verify device transitions to QUIESCED state Before the async partition scan fix, killing the daemon during partition scan would cause deadlock as partition scan held ub->mutex while waiting for IO. With the async fix, partition scan happens in a work function and flush_work() ensures proper synchronization. Add _add_ublk_dev_no_settle() helper function to skip udevadm settle, which would otherwise hang waiting for partition scan events to complete when partition table read is delayed. Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 7fc4da6 commit 60cf863

3 files changed

Lines changed: 81 additions & 4 deletions

File tree

tools/testing/selftests/ublk/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ TEST_PROGS += test_generic_11.sh
2222
TEST_PROGS += test_generic_12.sh
2323
TEST_PROGS += test_generic_13.sh
2424
TEST_PROGS += test_generic_14.sh
25+
TEST_PROGS += test_generic_15.sh
2526

2627
TEST_PROGS += test_null_01.sh
2728
TEST_PROGS += test_null_02.sh

tools/testing/selftests/ublk/test_common.sh

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,9 @@ _have_feature()
178178
_create_ublk_dev() {
179179
local dev_id;
180180
local cmd=$1
181+
local settle=$2
181182

182-
shift 1
183+
shift 2
183184

184185
if [ ! -c /dev/ublk-control ]; then
185186
return ${UBLK_SKIP_CODE}
@@ -194,7 +195,10 @@ _create_ublk_dev() {
194195
echo "fail to add ublk dev $*"
195196
return 255
196197
fi
197-
udevadm settle
198+
199+
if [ "$settle" = "yes" ]; then
200+
udevadm settle
201+
fi
198202

199203
if [[ "$dev_id" =~ ^[0-9]+$ ]]; then
200204
echo "${dev_id}"
@@ -204,14 +208,18 @@ _create_ublk_dev() {
204208
}
205209

206210
_add_ublk_dev() {
207-
_create_ublk_dev "add" "$@"
211+
_create_ublk_dev "add" "yes" "$@"
212+
}
213+
214+
_add_ublk_dev_no_settle() {
215+
_create_ublk_dev "add" "no" "$@"
208216
}
209217

210218
_recover_ublk_dev() {
211219
local dev_id
212220
local state
213221

214-
dev_id=$(_create_ublk_dev "recover" "$@")
222+
dev_id=$(_create_ublk_dev "recover" "yes" "$@")
215223
for ((j=0;j<20;j++)); do
216224
state=$(_get_ublk_dev_state "${dev_id}")
217225
[ "$state" == "LIVE" ] && break
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#!/bin/bash
2+
# SPDX-License-Identifier: GPL-2.0
3+
4+
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
5+
6+
TID="generic_15"
7+
ERR_CODE=0
8+
9+
_test_partition_scan_no_hang()
10+
{
11+
local recovery_flag=$1
12+
local expected_state=$2
13+
local dev_id
14+
local state
15+
local daemon_pid
16+
local start_time
17+
local elapsed
18+
19+
# Create ublk device with fault_inject target and very large delay
20+
# to simulate hang during partition table read
21+
# --delay_us 60000000 = 60 seconds delay
22+
# Use _add_ublk_dev_no_settle to avoid udevadm settle hang waiting
23+
# for partition scan events to complete
24+
if [ "$recovery_flag" = "yes" ]; then
25+
echo "Testing partition scan with recovery support..."
26+
dev_id=$(_add_ublk_dev_no_settle -t fault_inject -q 1 -d 1 --delay_us 60000000 -r 1)
27+
else
28+
echo "Testing partition scan without recovery..."
29+
dev_id=$(_add_ublk_dev_no_settle -t fault_inject -q 1 -d 1 --delay_us 60000000)
30+
fi
31+
32+
_check_add_dev "$TID" $?
33+
34+
# The add command should return quickly because partition scan is async.
35+
# Now sleep briefly to let the async partition scan work start and hit
36+
# the delay in the fault_inject handler.
37+
sleep 1
38+
39+
# Kill the ublk daemon while partition scan is potentially blocked
40+
# And check state transitions properly
41+
start_time=${SECONDS}
42+
daemon_pid=$(_get_ublk_daemon_pid "${dev_id}")
43+
state=$(__ublk_kill_daemon "${dev_id}" "${expected_state}")
44+
elapsed=$((SECONDS - start_time))
45+
46+
# Verify the device transitioned to expected state
47+
if [ "$state" != "${expected_state}" ]; then
48+
echo "FAIL: Device state is $state, expected ${expected_state}"
49+
ERR_CODE=255
50+
${UBLK_PROG} del -n "${dev_id}" > /dev/null 2>&1
51+
return
52+
fi
53+
echo "PASS: Device transitioned to ${expected_state} in ${elapsed}s without hanging"
54+
55+
# Clean up the device
56+
${UBLK_PROG} del -n "${dev_id}" > /dev/null 2>&1
57+
}
58+
59+
_prep_test "partition_scan" "verify async partition scan prevents IO hang"
60+
61+
# Test 1: Without recovery support - should transition to DEAD
62+
_test_partition_scan_no_hang "no" "DEAD"
63+
64+
# Test 2: With recovery support - should transition to QUIESCED
65+
_test_partition_scan_no_hang "yes" "QUIESCED"
66+
67+
_cleanup_test "partition_scan"
68+
_show_result $TID $ERR_CODE

0 commit comments

Comments
 (0)