Skip to content

Commit 8409f67

Browse files
committed
afs: Adjust the fileserver rotation algorithm to reprobe/retry more quickly
Adjust the fileserver rotation algorithm so that if we've tried all the addresses on a server (cumulatively over multiple operations) until we've run out of untried addresses, immediately reprobe all that server's interfaces and retry the op at least once before we move onto the next server. Signed-off-by: David Howells <dhowells@redhat.com>
1 parent 32275d3 commit 8409f67

3 files changed

Lines changed: 88 additions & 12 deletions

File tree

fs/afs/fs_probe.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,18 @@ static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server
338338
afs_put_server(net, server, afs_server_trace_put_probe);
339339
}
340340

341+
/*
342+
* Probe a server immediately without waiting for its due time to come
343+
* round. This is used when all of the addresses have been tried.
344+
*/
345+
void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
346+
{
347+
write_seqlock(&net->fs_lock);
348+
if (!list_empty(&server->probe_link))
349+
return afs_dispatch_fs_probe(net, server, true);
350+
write_sequnlock(&net->fs_lock);
351+
}
352+
341353
/*
342354
* Probe dispatcher to regularly dispatch probes to keep NAT alive.
343355
*/
@@ -411,3 +423,38 @@ void afs_fs_probe_dispatcher(struct work_struct *work)
411423
_leave(" [quiesce]");
412424
}
413425
}
426+
427+
/*
428+
* Wait for a probe on a particular fileserver to complete for 2s.
429+
*/
430+
int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
431+
{
432+
struct wait_queue_entry wait;
433+
unsigned long timo = 2 * HZ;
434+
435+
if (atomic_read(&server->probe_outstanding) == 0)
436+
goto dont_wait;
437+
438+
init_wait_entry(&wait, 0);
439+
for (;;) {
440+
prepare_to_wait_event(&server->probe_wq, &wait,
441+
is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
442+
if (timo == 0 ||
443+
server->probe.responded ||
444+
atomic_read(&server->probe_outstanding) == 0 ||
445+
(is_intr && signal_pending(current)))
446+
break;
447+
timo = schedule_timeout(timo);
448+
}
449+
450+
finish_wait(&server->probe_wq, &wait);
451+
452+
dont_wait:
453+
if (server->probe.responded)
454+
return 0;
455+
if (is_intr && signal_pending(current))
456+
return -ERESTARTSYS;
457+
if (timo == 0)
458+
return -ETIME;
459+
return -EDESTADDRREQ;
460+
}

fs/afs/internal.h

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -826,16 +826,18 @@ struct afs_operation {
826826
unsigned short nr_iterations; /* Number of server iterations */
827827

828828
unsigned int flags;
829-
#define AFS_OPERATION_STOP 0x0001 /* Set to cease iteration */
830-
#define AFS_OPERATION_VBUSY 0x0002 /* Set if seen VBUSY */
831-
#define AFS_OPERATION_VMOVED 0x0004 /* Set if seen VMOVED */
832-
#define AFS_OPERATION_VNOVOL 0x0008 /* Set if seen VNOVOL */
833-
#define AFS_OPERATION_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */
834-
#define AFS_OPERATION_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */
835-
#define AFS_OPERATION_UNINTR 0x0040 /* Set if op is uninterruptible */
836-
#define AFS_OPERATION_DOWNGRADE 0x0080 /* Set to retry with downgraded opcode */
837-
#define AFS_OPERATION_LOCK_0 0x0100 /* Set if have io_lock on file[0] */
838-
#define AFS_OPERATION_LOCK_1 0x0200 /* Set if have io_lock on file[1] */
829+
#define AFS_OPERATION_STOP 0x0001 /* Set to cease iteration */
830+
#define AFS_OPERATION_VBUSY 0x0002 /* Set if seen VBUSY */
831+
#define AFS_OPERATION_VMOVED 0x0004 /* Set if seen VMOVED */
832+
#define AFS_OPERATION_VNOVOL 0x0008 /* Set if seen VNOVOL */
833+
#define AFS_OPERATION_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */
834+
#define AFS_OPERATION_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */
835+
#define AFS_OPERATION_UNINTR 0x0040 /* Set if op is uninterruptible */
836+
#define AFS_OPERATION_DOWNGRADE 0x0080 /* Set to retry with downgraded opcode */
837+
#define AFS_OPERATION_LOCK_0 0x0100 /* Set if have io_lock on file[0] */
838+
#define AFS_OPERATION_LOCK_1 0x0200 /* Set if have io_lock on file[1] */
839+
#define AFS_OPERATION_TRIED_ALL 0x0400 /* Set if we've tried all the fileservers */
840+
#define AFS_OPERATION_RETRY_SERVER 0x0800 /* Set if we should retry the current server */
839841
};
840842

841843
/*
@@ -1055,7 +1057,9 @@ static inline void afs_op_set_fid(struct afs_operation *op, unsigned int n,
10551057
extern void afs_fileserver_probe_result(struct afs_call *);
10561058
extern void afs_fs_probe_fileserver(struct afs_net *, struct afs_server *, struct key *, bool);
10571059
extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
1060+
extern void afs_probe_fileserver(struct afs_net *, struct afs_server *);
10581061
extern void afs_fs_probe_dispatcher(struct work_struct *);
1062+
extern int afs_wait_for_one_fs_probe(struct afs_server *, bool);
10591063

10601064
/*
10611065
* inode.c

fs/afs/rotate.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@ bool afs_select_fileserver(struct afs_operation *op)
369369

370370
_debug("USING SERVER: %pU", &server->uuid);
371371

372+
op->flags |= AFS_OPERATION_RETRY_SERVER;
372373
op->server = server;
373374
if (vnode->cb_server != server) {
374375
vnode->cb_server = server;
@@ -383,6 +384,7 @@ bool afs_select_fileserver(struct afs_operation *op)
383384
afs_get_addrlist(alist);
384385
read_unlock(&server->fs_lock);
385386

387+
retry_server:
386388
memset(&op->ac, 0, sizeof(op->ac));
387389

388390
if (!op->ac.alist)
@@ -398,13 +400,36 @@ bool afs_select_fileserver(struct afs_operation *op)
398400
* address on which it will respond to us.
399401
*/
400402
if (!afs_iterate_addresses(&op->ac))
401-
goto next_server;
403+
goto out_of_addresses;
402404

403-
_debug("address [%u] %u/%u", op->index, op->ac.index, op->ac.alist->nr_addrs);
405+
_debug("address [%u] %u/%u %pISp",
406+
op->index, op->ac.index, op->ac.alist->nr_addrs,
407+
&op->ac.alist->addrs[op->ac.index].transport);
404408

405409
_leave(" = t");
406410
return true;
407411

412+
out_of_addresses:
413+
/* We've now had a failure to respond on all of a server's addresses -
414+
* immediately probe them again and consider retrying the server.
415+
*/
416+
afs_probe_fileserver(op->net, op->server);
417+
if (op->flags & AFS_OPERATION_RETRY_SERVER) {
418+
alist = op->ac.alist;
419+
error = afs_wait_for_one_fs_probe(
420+
op->server, !(op->flags & AFS_OPERATION_UNINTR));
421+
switch (error) {
422+
case 0:
423+
op->flags &= ~AFS_OPERATION_RETRY_SERVER;
424+
goto retry_server;
425+
case -ERESTARTSYS:
426+
goto failed_set_error;
427+
case -ETIME:
428+
case -EDESTADDRREQ:
429+
goto next_server;
430+
}
431+
}
432+
408433
next_server:
409434
_debug("next");
410435
afs_end_cursor(&op->ac);

0 commit comments

Comments
 (0)