afs: Retry rxrpc calls with address rotation on network error

When a network error occurs when we attempt a call, we want to rotate the
set of addresses we have for that peer and try the call again.  Use the new
AF_RXRPC call-retrying facility to do this, thereby avoiding the need to
re-encrypt each time as this allows us to reuse the Tx-queue from the dead
call.

This method will work for accessing alternate VL servers and the various
addresses available for a single FS server, but should not be used to go to
alternate FS servers since that has other implications (such as getting
callbacks on other servers).

To this end:

 (1) An 'address list' concept is introduced.  Address lists are RCU
     replaceable lists of addresses.

 (2) A cell's VL server address list can be loaded directly via insmod or
     echo to /proc/fs/afs/cells or dynamically from a DNS query for AFSDB
     or SRV records.

 (3) An FS server's address list, for the moment, has a single entry that
     is the key to the server list.  This will change in the future when a
     server is instead keyed on its UUID and the VL.GetAddrsU operation is
     used.

 (4) Anyone wanting to use a cell's VL server address must wait until the
     cell record comes online and has tried to obtain some addresses.

 (5) An 'address cursor' concept is introduced to handle stepping over the
     address list.  For client calls, this is driven from a wrapper around
     rxrpc_kernel_send_data().  It isn't used for CM service call replies as
     they have to go to the caller's address.

In the future, we might want to annotate the list with information about
how each address fares.  We might then want to propagate such annotations
over address list replacement.

Whilst we're at it, we allow IPv6 addresses to be specified in
colon-delimited lists by enclosing them in square brackets.

Signed-off-by: David Howells <dhowells@redhat.com>
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 095c541..7cb4d55 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -6,6 +6,7 @@
 
 kafs-objs := \
 	$(afs-cache-y) \
+	addr_list.o \
 	callback.o \
 	cell.o \
 	cmservice.o \
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
new file mode 100644
index 0000000..4de7b07
--- /dev/null
+++ b/fs/afs/addr_list.c
@@ -0,0 +1,306 @@
+/* Server address list management
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/dns_resolver.h>
+#include <linux/inet.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+
+#define AFS_MAX_ADDRESSES \
+	((PAGE_SIZE - sizeof(struct afs_addr_list)) / sizeof(struct sockaddr_rxrpc))
+
+/*
+ * Release an address list.
+ */
+void afs_put_addrlist(struct afs_addr_list *alist)
+{
+	if (alist && refcount_dec_and_test(&alist->usage))
+		call_rcu(&alist->rcu, (rcu_callback_t)kfree);
+}
+
+/*
+ * Allocate an address list.
+ */
+static struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
+						unsigned short service,
+						unsigned short port)
+{
+	struct afs_addr_list *alist;
+	unsigned int i;
+
+	_enter("%u,%u,%u", nr, service, port);
+
+	alist = kzalloc(sizeof(*alist) + sizeof(alist->addrs[0]) * nr,
+			GFP_KERNEL);
+	if (!alist)
+		return NULL;
+
+	refcount_set(&alist->usage, 1);
+
+	for (i = 0; i < nr; i++) {
+		struct sockaddr_rxrpc *srx = &alist->addrs[i];
+		srx->srx_family			= AF_RXRPC;
+		srx->srx_service		= service;
+		srx->transport_type		= SOCK_DGRAM;
+		srx->transport_len		= sizeof(srx->transport.sin6);
+		srx->transport.sin6.sin6_family	= AF_INET6;
+		srx->transport.sin6.sin6_port	= htons(port);
+	}
+
+	return alist;
+}
+
+/*
+ * Parse a text string consisting of delimited addresses.
+ */
+struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
+					   char delim,
+					   unsigned short service,
+					   unsigned short port)
+{
+	struct afs_addr_list *alist;
+	const char *p, *end = text + len;
+	unsigned int nr = 0;
+
+	_enter("%*.*s,%c", (int)len, (int)len, text, delim);
+
+	if (!len)
+		return ERR_PTR(-EDESTADDRREQ);
+
+	if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
+		delim = ',';
+
+	/* Count the addresses */
+	p = text;
+	do {
+		if (!*p)
+			return ERR_PTR(-EINVAL);
+		if (*p == delim)
+			continue;
+		nr++;
+		if (*p == '[') {
+			p++;
+			if (p == end)
+				return ERR_PTR(-EINVAL);
+			p = memchr(p, ']', end - p);
+			if (!p)
+				return ERR_PTR(-EINVAL);
+			p++;
+			if (p >= end)
+				break;
+		}
+
+		p = memchr(p, delim, end - p);
+		if (!p)
+			break;
+		p++;
+	} while (p < end);
+
+	_debug("%u/%lu addresses", nr, AFS_MAX_ADDRESSES);
+	if (nr > AFS_MAX_ADDRESSES)
+		nr = AFS_MAX_ADDRESSES;
+
+	alist = afs_alloc_addrlist(nr, service, port);
+	if (!alist)
+		return ERR_PTR(-ENOMEM);
+
+	/* Extract the addresses */
+	p = text;
+	do {
+		struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs];
+		char tdelim = delim;
+
+		if (*p == delim) {
+			p++;
+			continue;
+		}
+
+		if (*p == '[') {
+			p++;
+			tdelim = ']';
+		}
+
+		if (in4_pton(p, end - p,
+			     (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
+			     tdelim, &p)) {
+			srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
+			srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
+			srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+		} else if (in6_pton(p, end - p,
+				    srx->transport.sin6.sin6_addr.s6_addr,
+				    tdelim, &p)) {
+			/* Nothing to do */
+		} else {
+			goto bad_address;
+		}
+
+		if (tdelim == ']') {
+			if (p == end || *p != ']')
+				goto bad_address;
+			p++;
+		}
+
+		if (p < end) {
+			if (*p == '+') {
+				/* Port number specification "+1234" */
+				unsigned int xport = 0;
+				p++;
+				if (p >= end || !isdigit(*p))
+					goto bad_address;
+				do {
+					xport *= 10;
+					xport += *p - '0';
+					if (xport > 65535)
+						goto bad_address;
+					p++;
+				} while (p < end && isdigit(*p));
+				srx->transport.sin6.sin6_port = htons(xport);
+			} else if (*p == delim) {
+				p++;
+			} else {
+				goto bad_address;
+			}
+		}
+
+		alist->nr_addrs++;
+	} while (p < end && alist->nr_addrs < AFS_MAX_ADDRESSES);
+
+	_leave(" = [nr %u]", alist->nr_addrs);
+	return alist;
+
+bad_address:
+	kfree(alist);
+	return ERR_PTR(-EINVAL);
+}
+
+/*
+ * Compare old and new address lists to see if there's been any change.
+ * - How to do this in better than O(Nlog(N)) time?
+ *   - We don't really want to sort the address list, but would rather take the
+ *     list as we got it so as not to undo record rotation by the DNS server.
+ */
+#if 0
+static int afs_cmp_addr_list(const struct afs_addr_list *a1,
+			     const struct afs_addr_list *a2)
+{
+}
+#endif
+
+/*
+ * Perform a DNS query for VL servers and build a up an address list.
+ */
+struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
+{
+	struct afs_addr_list *alist;
+	char *vllist = NULL;
+	int ret;
+
+	_enter("%s", cell->name);
+
+	ret = dns_query("afsdb", cell->name, cell->name_len,
+			"ipv4", &vllist, _expiry);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	alist = afs_parse_text_addrs(vllist, strlen(vllist), ',',
+				     VL_SERVICE, AFS_VL_PORT);
+	if (IS_ERR(alist)) {
+		kfree(vllist);
+		if (alist != ERR_PTR(-ENOMEM))
+			pr_err("Failed to parse DNS data\n");
+		return alist;
+	}
+
+	kfree(vllist);
+	return alist;
+}
+
+/*
+ * Get an address to try.
+ */
+struct sockaddr_rxrpc *afs_get_address(struct afs_addr_cursor *ac)
+{
+	unsigned short index;
+
+	_enter("%hu+%hd", ac->start, (short)ac->index);
+
+	if (!ac->alist)
+		return ERR_PTR(ac->error);
+
+	ac->index++;
+	if (ac->index == ac->alist->nr_addrs)
+		return ERR_PTR(-EDESTADDRREQ);
+
+	index = ac->start + ac->index;
+	if (index >= ac->alist->nr_addrs)
+		index -= ac->alist->nr_addrs;
+
+	return &ac->alist->addrs[index];
+}
+
+/*
+ * Release an address list cursor.
+ */
+void afs_end_cursor(struct afs_addr_cursor *ac)
+{
+	afs_put_addrlist(ac->alist);
+}
+
+/*
+ * Set the address cursor for iterating over VL servers.
+ */
+void afs_set_vl_cursor(struct afs_call *call, struct afs_cell *cell)
+{
+	struct afs_addr_cursor *ac = &call->cursor;
+	struct afs_addr_list *alist;
+	int ret;
+
+	if (!rcu_access_pointer(cell->vl_addrs)) {
+		ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
+				  TASK_INTERRUPTIBLE);
+		if (ret < 0) {
+			ac->error = ret;
+			return;
+		}
+
+		if (!rcu_access_pointer(cell->vl_addrs) &&
+		    ktime_get_real_seconds() < cell->dns_expiry) {
+			ac->error = cell->error;
+			return;
+		}
+	}
+
+	read_lock(&cell->vl_addrs_lock);
+	alist = rcu_dereference_protected(cell->vl_addrs,
+					  lockdep_is_held(&cell->vl_addrs_lock));
+	afs_get_addrlist(alist);
+	read_unlock(&cell->vl_addrs_lock);
+
+	ac->alist = alist;
+	ac->start = alist->index;
+	ac->index = 0xffff;
+	ac->error = 0;
+}
+
+/*
+ * Set the address cursor for iterating over FS servers.
+ */
+void afs_set_fs_cursor(struct afs_call *call, struct afs_server *server)
+{
+	struct afs_addr_cursor *ac = &call->cursor;
+
+	ac->alist = afs_get_addrlist(server->addrs);
+	ac->start = ac->alist->index;
+	ac->index = 0xffff;
+	ac->error = 0;
+}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index ab042d8..c8033a4 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -9,7 +9,6 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/key.h>
 #include <linux/ctype.h>
@@ -152,68 +151,33 @@
 	init_rwsem(&cell->vl_sem);
 	INIT_LIST_HEAD(&cell->vl_list);
 	spin_lock_init(&cell->vl_lock);
-	seqlock_init(&cell->vl_addrs_lock);
-	cell->flags = (1 << AFS_CELL_FL_NOT_READY);
-
-	for (i = 0; i < AFS_CELL_MAX_ADDRS; i++) {
-		struct sockaddr_rxrpc *srx = &cell->vl_addrs[i];
-		srx->srx_family			= AF_RXRPC;
-		srx->srx_service		= VL_SERVICE;
-		srx->transport_type		= SOCK_DGRAM;
-		srx->transport.sin6.sin6_family	= AF_INET6;
-		srx->transport.sin6.sin6_port	= htons(AFS_VL_PORT);
-	}
+	cell->flags = ((1 << AFS_CELL_FL_NOT_READY) |
+		       (1 << AFS_CELL_FL_NO_LOOKUP_YET));
+	rwlock_init(&cell->vl_addrs_lock);
 
 	/* Fill in the VL server list if we were given a list of addresses to
 	 * use.
 	 */
 	if (vllist) {
-		char delim = ':';
+		struct afs_addr_list *alist;
 
-		if (strchr(vllist, ',') || !strchr(vllist, '.'))
-			delim = ',';
+		alist = afs_parse_text_addrs(vllist, strlen(vllist), ':',
+					     VL_SERVICE, AFS_VL_PORT);
+		if (IS_ERR(alist)) {
+			ret = PTR_ERR(alist);
+			goto parse_failed;
+		}
 
-		do {
-			struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs];
-
-			if (in4_pton(vllist, -1,
-				     (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
-				     delim, &vllist)) {
-				srx->transport_len = sizeof(struct sockaddr_in6);
-				srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
-				srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
-				srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
-			} else if (in6_pton(vllist, -1,
-					    srx->transport.sin6.sin6_addr.s6_addr,
-					    delim, &vllist)) {
-				srx->transport_len = sizeof(struct sockaddr_in6);
-				srx->transport.sin6.sin6_family	= AF_INET6;
-			} else {
-				goto bad_address;
-			}
-
-			cell->vl_naddrs++;
-			if (!*vllist)
-				break;
-			vllist++;
-
-		} while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && vllist);
-
-		/* Disable DNS refresh for manually-specified cells */
+		rcu_assign_pointer(cell->vl_addrs, alist);
 		cell->dns_expiry = TIME64_MAX;
-	} else {
-		/* We're going to need to 'refresh' this cell's VL server list
-		 * from the DNS before we can use it.
-		 */
-		cell->dns_expiry = S64_MIN;
 	}
 
 	_leave(" = %p", cell);
 	return cell;
 
-bad_address:
-	printk(KERN_ERR "kAFS: bad VL server IP address\n");
-	ret = -EINVAL;
+parse_failed:
+	if (ret == -EINVAL)
+		printk(KERN_ERR "kAFS: bad VL server IP address\n");
 	kfree(cell);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
@@ -325,7 +289,6 @@
 	if (excl) {
 		ret = -EEXIST;
 	} else {
-		ASSERTCMP(atomic_read(&cursor->usage), >=, 1);
 		afs_get_cell(cursor);
 		ret = 0;
 	}
@@ -333,8 +296,10 @@
 	kfree(candidate);
 	if (ret == 0)
 		goto wait_for_cell;
+	goto error_noput;
 error:
 	afs_put_cell(net, cell);
+error_noput:
 	_leave(" = %d [error]", ret);
 	return ERR_PTR(ret);
 }
@@ -396,78 +361,50 @@
  */
 static void afs_update_cell(struct afs_cell *cell)
 {
+	struct afs_addr_list *alist, *old;
 	time64_t now, expiry;
-	char *vllist = NULL;
-	int ret;
 
 	_enter("%s", cell->name);
 
-	ret = dns_query("afsdb", cell->name, cell->name_len,
-			"ipv4", &vllist, &expiry);
-	_debug("query %d", ret);
-	switch (ret) {
-	case 0 ... INT_MAX:
-		clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
-		clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
-		goto parse_dns_data;
+	alist = afs_dns_query(cell, &expiry);
+	if (IS_ERR(alist)) {
+		switch (PTR_ERR(alist)) {
+		case -ENODATA:
+			/* The DNS said that the cell does not exist */
+			set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
+			clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
+			cell->dns_expiry = ktime_get_real_seconds() + 61;
+			break;
 
-	case -ENODATA:
-		clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
-		set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
-		cell->dns_expiry = ktime_get_real_seconds() + 61;
-		cell->error = -EDESTADDRREQ;
-		goto out;
-
-	case -EAGAIN:
-	case -ECONNREFUSED:
-	default:
-		/* Unable to query DNS. */
-		set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
-		cell->dns_expiry = ktime_get_real_seconds() + 10;
-		cell->error = -EDESTADDRREQ;
-		goto out;
-	}
-
-parse_dns_data:
-	write_seqlock(&cell->vl_addrs_lock);
-
-	ret = -EINVAL;
-	do {
-		struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs];
-
-		if (in4_pton(vllist, -1,
-			     (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
-			     ',', (const char **)&vllist)) {
-			srx->transport_len = sizeof(struct sockaddr_in6);
-			srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
-			srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
-			srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
-		} else if (in6_pton(vllist, -1,
-				    srx->transport.sin6.sin6_addr.s6_addr,
-				    ',', (const char **)&vllist)) {
-			srx->transport_len = sizeof(struct sockaddr_in6);
-			srx->transport.sin6.sin6_family	= AF_INET6;
-		} else {
-			goto bad_address;
+		case -EAGAIN:
+		case -ECONNREFUSED:
+		default:
+			set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
+			cell->dns_expiry = ktime_get_real_seconds() + 10;
+			break;
 		}
 
-		cell->vl_naddrs++;
-		if (!*vllist)
-			break;
-		vllist++;
+		cell->error = -EDESTADDRREQ;
+	} else {
+		clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
+		clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
 
-	} while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS);
+		/* Exclusion on changing vl_addrs is achieved by a
+		 * non-reentrant work item.
+		 */
+		old = rcu_dereference_protected(cell->vl_addrs, true);
+		rcu_assign_pointer(cell->vl_addrs, alist);
+		cell->dns_expiry = expiry;
 
-	if (cell->vl_naddrs < AFS_CELL_MAX_ADDRS)
-		memset(cell->vl_addrs + cell->vl_naddrs, 0,
-		       (AFS_CELL_MAX_ADDRS - cell->vl_naddrs) * sizeof(cell->vl_addrs[0]));
+		if (old)
+			afs_put_addrlist(old);
+	}
+
+	if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
+		wake_up_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET);
 
 	now = ktime_get_real_seconds();
-	cell->dns_expiry = expiry;
-	afs_set_cell_timer(cell->net, expiry - now);
-bad_address:
-	write_sequnlock(&cell->vl_addrs_lock);
-out:
+	afs_set_cell_timer(cell->net, cell->dns_expiry - now);
 	_leave("");
 }
 
@@ -482,6 +419,7 @@
 
 	ASSERTCMP(atomic_read(&cell->usage), ==, 0);
 
+	afs_put_addrlist(cell->vl_addrs);
 	key_put(cell->anonymous_key);
 	kfree(cell);
 
@@ -515,11 +453,21 @@
 }
 
 /*
+ * Get a reference on a cell record.
+ */
+struct afs_cell *afs_get_cell(struct afs_cell *cell)
+{
+	atomic_inc(&cell->usage);
+	return cell;
+}
+
+/*
  * Drop a reference on a cell record.
  */
 void afs_put_cell(struct afs_net *net, struct afs_cell *cell)
 {
 	time64_t now, expire_delay;
+	unsigned int usage;
 
 	if (!cell)
 		return;
@@ -533,7 +481,8 @@
 	    !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags))
 		expire_delay = afs_cell_gc_delay;
 
-	if (atomic_dec_return(&cell->usage) > 1)
+	usage = atomic_dec_return(&cell->usage);
+	if (usage > 1)
 		return;
 
 	/* 'cell' may now be garbage collected. */
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index bac2e8d..f4e3ec1 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -305,7 +305,8 @@
 	bp[2] = htonl(vnode->fid.vnode);
 	bp[3] = htonl(vnode->fid.unique);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -516,7 +517,8 @@
 	bp[7] = htonl(lower_32_bits(req->len));
 
 	atomic_inc(&req->usage);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -559,7 +561,8 @@
 	bp[5] = htonl(lower_32_bits(req->len));
 
 	atomic_inc(&req->usage);
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -641,7 +644,8 @@
 	ASSERT(ncallbacks > 0);
 	wake_up_nr(&server->cb_break_waitq, ncallbacks);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -736,7 +740,8 @@
 	*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
 	*bp++ = 0; /* segment size */
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -815,7 +820,8 @@
 		bp = (void *) bp + padsz;
 	}
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -899,7 +905,8 @@
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -1002,7 +1009,8 @@
 	*bp++ = htonl(S_IRWXUGO); /* unix mode */
 	*bp++ = 0; /* segment size */
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -1104,7 +1112,8 @@
 		bp = (void *) bp + n_padsz;
 	}
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -1207,7 +1216,8 @@
 	*bp++ = htonl(i_size >> 32);
 	*bp++ = htonl((u32) i_size);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -1280,7 +1290,8 @@
 	*bp++ = htonl(size);
 	*bp++ = htonl(i_size);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -1380,7 +1391,8 @@
 	*bp++ = htonl(attr->ia_size >> 32);	/* new file length */
 	*bp++ = htonl((u32) attr->ia_size);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -1427,7 +1439,8 @@
 	*bp++ = 0;				/* size of write */
 	*bp++ = htonl(attr->ia_size);		/* new file length */
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -1468,7 +1481,8 @@
 
 	xdr_encode_AFS_StoreStatus(&bp, attr);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -1699,7 +1713,8 @@
 	bp[0] = htonl(FSGETVOLUMESTATUS);
 	bp[1] = htonl(vnode->fid.vid);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -1784,7 +1799,8 @@
 	*bp++ = htonl(vnode->fid.unique);
 	*bp++ = htonl(type);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -1815,7 +1831,8 @@
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
 
 /*
@@ -1846,5 +1863,6 @@
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, async);
+	afs_set_fs_cursor(call, server);
+	return afs_make_call(call, GFP_NOFS, async);
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 9a7c1e9..5f8067f 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -31,6 +31,7 @@
 
 struct pagevec;
 struct afs_call;
+struct afs_addr_cursor;
 
 typedef enum {
 	AFS_VL_NEW,			/* new, uninitialised record */
@@ -66,6 +67,24 @@
 };
 
 /*
+ * List of server addresses.
+ */
+struct afs_addr_list {
+	struct rcu_head		rcu;		/* Must be first */
+	refcount_t		usage;
+	unsigned short		nr_addrs;
+	unsigned short		index;		/* Address currently in use */
+	struct sockaddr_rxrpc	addrs[];
+};
+
+struct afs_addr_cursor {
+	struct afs_addr_list	*alist;
+	unsigned short		start;		/* Starting point in alist->addrs[] */
+	unsigned short		index;		/* Wrapping offset from start to current addr */
+	short			error;
+};
+
+/*
  * a record of an in-progress RxRPC call
  */
 struct afs_call {
@@ -77,6 +96,7 @@
 	struct key		*key;		/* security for this call */
 	struct afs_net		*net;		/* The network namespace */
 	struct afs_server	*server;	/* server affected by incoming CM call */
+	struct afs_addr_cursor	cursor;		/* Address/server rotation cursor */
 	void			*request;	/* request data (first part) */
 	struct address_space	*mapping;	/* page set */
 	struct afs_writeback	*wb;		/* writeback being performed */
@@ -276,16 +296,15 @@
 #define AFS_CELL_FL_NO_GC	1		/* The cell was added manually, don't auto-gc */
 #define AFS_CELL_FL_NOT_FOUND	2		/* Permanent DNS error */
 #define AFS_CELL_FL_DNS_FAIL	3		/* Failed to access DNS */
+#define AFS_CELL_FL_NO_LOOKUP_YET 4		/* Not completed first DNS lookup yet */
 	enum afs_cell_state	state;
 	short			error;
 
 	spinlock_t		vl_lock;	/* vl_list lock */
 
 	/* VLDB server list. */
-	seqlock_t		vl_addrs_lock;
-	unsigned short		vl_naddrs;	/* number of VL servers in addr list */
-	unsigned short		vl_curr_svix;	/* current server index */
-	struct sockaddr_rxrpc	vl_addrs[AFS_CELL_MAX_ADDRS];	/* cell VL server addresses */
+	rwlock_t		vl_addrs_lock;	/* Lock on vl_addrs */
+	struct afs_addr_list	__rcu *vl_addrs; /* List of VL servers */
 	u8			name_len;	/* Length of name */
 	char			name[64 + 1];	/* Cell name, case-flattened and NUL-padded */
 };
@@ -336,7 +355,7 @@
 struct afs_server {
 	atomic_t		usage;
 	time64_t		time_of_death;	/* time at which put reduced usage to 0 */
-	struct sockaddr_rxrpc	addr;		/* server address */
+	struct afs_addr_list	__rcu *addrs;	/* List of addresses for this server */
 	struct afs_net		*net;		/* The network namespace */
 	struct afs_cell		*cell;		/* cell in which server resides */
 	struct list_head	link;		/* link in cell's server list */
@@ -474,6 +493,23 @@
 
 /*****************************************************************************/
 /*
+ * addr_list.c
+ */
+static inline struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist)
+{
+	refcount_inc(&alist->usage);
+	return alist;
+}
+extern void afs_put_addrlist(struct afs_addr_list *);
+extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char,
+						  unsigned short, unsigned short);
+extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *);
+extern void afs_set_vl_cursor(struct afs_call *, struct afs_cell *);
+extern void afs_set_fs_cursor(struct afs_call *, struct afs_server *);
+extern struct sockaddr_rxrpc *afs_get_address(struct afs_addr_cursor *);
+extern 	void afs_end_cursor(struct afs_addr_cursor *);
+
+/*
  * cache.c
  */
 #ifdef CONFIG_AFS_FSCACHE
@@ -504,11 +540,11 @@
 /*
  * cell.c
  */
-#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
 extern int afs_cell_init(struct afs_net *, const char *);
 extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned);
 extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned,
 					const char *, bool);
+extern struct afs_cell *afs_get_cell(struct afs_cell *);
 extern void afs_put_cell(struct afs_net *, struct afs_cell *);
 extern void afs_manage_cells(struct work_struct *);
 extern void afs_cells_timer(unsigned long);
@@ -662,7 +698,7 @@
 extern void afs_charge_preallocation(struct work_struct *);
 extern void afs_put_call(struct afs_call *);
 extern int afs_queue_call_work(struct afs_call *);
-extern int afs_make_call(struct sockaddr_rxrpc *, struct afs_call *, gfp_t, bool);
+extern int afs_make_call(struct afs_call *, gfp_t, bool);
 extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
 					    const struct afs_call_type *,
 					    size_t, size_t);
@@ -713,12 +749,10 @@
 /*
  * vlclient.c
  */
-extern int afs_vl_get_entry_by_name(struct afs_net *,
-				    struct sockaddr_rxrpc *, struct key *,
-				    const char *, struct afs_cache_vlocation *,
-				    bool);
-extern int afs_vl_get_entry_by_id(struct afs_net *,
-				  struct sockaddr_rxrpc *, struct key *,
+extern int afs_vl_get_entry_by_name(struct afs_cell *, struct key *,
+				    const char *,
+				    struct afs_cache_vlocation *, bool);
+extern int afs_vl_get_entry_by_id(struct afs_cell *,struct key *,
 				  afs_volid_t, afs_voltype_t,
 				  struct afs_cache_vlocation *, bool);
 
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 0856542..9cf9ce8 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -514,23 +514,23 @@
  */
 static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
 {
+	struct afs_addr_list *alist;
 	struct afs_cell *cell = m->private;
 	loff_t pos = *_pos;
 
-	_enter("cell=%p pos=%Ld", cell, *_pos);
+	rcu_read_lock();
 
-	/* lock the list against modification */
-	down_read(&cell->vl_sem);
+	alist = rcu_dereference(cell->vl_addrs);
 
 	/* allow for the header line */
 	if (!pos)
 		return (void *) 1;
 	pos--;
 
-	if (pos >= cell->vl_naddrs)
+	if (!alist || pos >= alist->nr_addrs)
 		return NULL;
 
-	return &cell->vl_addrs[pos];
+	return alist->addrs + pos;
 }
 
 /*
@@ -539,17 +539,18 @@
 static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
 					  loff_t *_pos)
 {
+	struct afs_addr_list *alist;
 	struct afs_cell *cell = p->private;
 	loff_t pos;
 
-	_enter("cell=%p{nad=%u} pos=%Ld", cell, cell->vl_naddrs, *_pos);
+	alist = rcu_dereference(cell->vl_addrs);
 
 	pos = *_pos;
 	(*_pos)++;
-	if (pos >= cell->vl_naddrs)
+	if (!alist || pos >= alist->nr_addrs)
 		return NULL;
 
-	return &cell->vl_addrs[pos];
+	return alist->addrs + pos;
 }
 
 /*
@@ -557,9 +558,7 @@
  */
 static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
 {
-	struct afs_cell *cell = p->private;
-
-	up_read(&cell->vl_sem);
+	rcu_read_unlock();
 }
 
 /*
@@ -658,7 +657,7 @@
 	}
 
 	/* display one cell per line on subsequent lines */
-	sprintf(ipaddr, "%pISp", &server->addr.transport);
+	sprintf(ipaddr, "%pISp", &server->addrs->addrs[0].transport);
 	seq_printf(m, "%3d %-15s %5d\n",
 		   atomic_read(&server->usage), ipaddr, server->fs_state);
 
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 805ae05..ab149f6 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -162,6 +162,7 @@
 		if (call->type->destructor)
 			call->type->destructor(call);
 
+		afs_end_cursor(&call->cursor);
 		kfree(call->request);
 		kfree(call);
 
@@ -287,6 +288,84 @@
 }
 
 /*
+ * Send data through rxrpc and rotate the destination address if a network
+ * error of some sort occurs.
+ */
+static int afs_send_data(struct afs_call *call, struct msghdr *msg,
+			 unsigned int bytes)
+{
+	enum rxrpc_call_completion compl;
+	struct sockaddr_rxrpc *srx;
+	int ret;
+
+resume:
+	ret = rxrpc_kernel_send_data(call->net->socket, call->rxcall,
+				     msg, bytes, afs_notify_end_request_tx);
+
+	/* Success and obvious local errors are returned immediately.  Note
+	 * that for an async operation, the call struct may already have
+	 * evaporated.
+	 */
+	if (ret >= 0 ||
+	    ret == -ENOMEM ||
+	    ret == -ENONET ||
+	    ret == -EINTR ||
+	    ret == -EFAULT ||
+	    ret == -ERESTARTSYS ||
+	    ret == -EKEYEXPIRED ||
+	    ret == -EKEYREVOKED ||
+	    ret == -EKEYREJECTED ||
+	    ret == -EPERM)
+		return ret;
+
+	/* Check to see if it's an error that meant the call data packets never
+	 * reached the peer.
+	 */
+	call->error = rxrpc_kernel_check_call(call->net->socket, call->rxcall,
+					      &compl, &call->abort_code);
+	if (call->error != -EINPROGRESS)
+		return ret;
+
+	switch (compl) {
+	case RXRPC_CALL_SUCCEEDED:
+	default:
+		WARN_ONCE(true, "AFS: Call succeeded despite send-data failing\n");
+		return 0;
+
+	case RXRPC_CALL_REMOTELY_ABORTED:
+	case RXRPC_CALL_LOCALLY_ABORTED:
+		/* All of these indicate that we had some interaction with the
+		 * server, so there's no point trying another server.
+		 */
+		return call->error;
+
+	case RXRPC_CALL_LOCAL_ERROR:
+	case RXRPC_CALL_NETWORK_ERROR:
+		/* Local errors from an attempt to connect a call and network
+		 * errors reported back by ICMP suggest skipping the current
+		 * address and trying the next.
+		 */
+		break;
+	}
+
+	/* Rotate servers if possible. */
+	srx = afs_get_address(&call->cursor);
+	if (IS_ERR(srx)) {
+		_leave(" = %ld [cursor]", PTR_ERR(srx));
+		return PTR_ERR(srx);
+	}
+
+	ret = rxrpc_kernel_retry_call(call->net->socket, call->rxcall,
+				      srx, call->key);
+	if (ret < 0)
+		return ret;
+
+	if (msg_data_left(msg) > 0)
+		goto resume;
+	return 0;
+}
+
+/*
  * attach the data from a bunch of pages on an inode to a call
  */
 static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
@@ -305,8 +384,7 @@
 		bytes = msg->msg_iter.count;
 		nr = msg->msg_iter.nr_segs;
 
-		ret = rxrpc_kernel_send_data(call->net->socket, call->rxcall, msg,
-					     bytes, afs_notify_end_request_tx);
+		ret = afs_send_data(call, msg, bytes);
 		for (loop = 0; loop < nr; loop++)
 			put_page(bv[loop].bv_page);
 		if (ret < 0)
@@ -321,9 +399,9 @@
 /*
  * initiate a call
  */
-int afs_make_call(struct sockaddr_rxrpc *srx, struct afs_call *call,
-		  gfp_t gfp, bool async)
+int afs_make_call(struct afs_call *call, gfp_t gfp, bool async)
 {
+	struct sockaddr_rxrpc *srx;
 	struct rxrpc_call *rxcall;
 	struct msghdr msg;
 	struct kvec iov[1];
@@ -332,7 +410,7 @@
 	u32 abort_code;
 	int ret;
 
-	_enter(",{%pISp},", &srx->transport);
+	_enter("");
 
 	ASSERT(call->type != NULL);
 	ASSERT(call->type->name != NULL);
@@ -354,6 +432,11 @@
 	}
 
 	/* create a call */
+	srx = afs_get_address(&call->cursor);
+	if (IS_ERR(srx))
+		return PTR_ERR(srx);
+
+	_debug("call %pISp", &srx->transport);
 	rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
 					 (unsigned long)call,
 					 tx_total_len, gfp,
@@ -380,16 +463,7 @@
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= (call->send_pages ? MSG_MORE : 0);
 
-	/* We have to change the state *before* sending the last packet as
-	 * rxrpc might give us the reply before it returns from sending the
-	 * request.  Further, if the send fails, we may already have been given
-	 * a notification and may have collected it.
-	 */
-	if (!call->send_pages)
-		call->state = AFS_CALL_AWAIT_REPLY;
-	ret = rxrpc_kernel_send_data(call->net->socket, rxcall,
-				     &msg, call->request_size,
-				     afs_notify_end_request_tx);
+	ret = afs_send_data(call, &msg, call->request_size);
 	if (ret < 0)
 		goto error_do_abort;
 
@@ -758,7 +832,6 @@
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
 
-	call->state = AFS_CALL_AWAIT_ACK;
 	switch (rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, 0,
 				       afs_notify_end_reply_tx)) {
 	case 0:
@@ -798,7 +871,6 @@
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
 
-	call->state = AFS_CALL_AWAIT_ACK;
 	n = rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, len,
 				   afs_notify_end_reply_tx);
 	if (n >= 0) {
@@ -816,6 +888,69 @@
 }
 
 /*
+ * Totate the destination address if a network error of some sort occurs and
+ * retry the call.
+ */
+static int afs_retry_call(struct afs_call *call, int ret)
+{
+	enum rxrpc_call_completion compl;
+	struct sockaddr_rxrpc *srx;
+
+	if (ret == -ENOMEM ||
+	    ret == -ENONET ||
+	    ret == -EINTR ||
+	    ret == -EFAULT ||
+	    ret == -ERESTARTSYS ||
+	    ret == -EKEYEXPIRED ||
+	    ret == -EKEYREVOKED ||
+	    ret == -EKEYREJECTED ||
+	    ret == -EPERM)
+		return ret;
+
+	/* Check to see if it's an error that meant the call data packets never
+	 * reached the peer.
+	 */
+	call->error = rxrpc_kernel_check_call(call->net->socket, call->rxcall,
+					      &compl, &call->abort_code);
+	if (call->error == -EINPROGRESS)
+		return ret;
+
+	switch (compl) {
+	case RXRPC_CALL_SUCCEEDED:
+	default:
+		WARN_ONCE(true, "AFS: Call succeeded despite send-data failing\n");
+		return 0;
+
+	case RXRPC_CALL_REMOTELY_ABORTED:
+	case RXRPC_CALL_LOCALLY_ABORTED:
+		/* All of these indicate that we had some interaction with the
+		 * server, so there's no point trying another server.
+		 */
+		return call->error;
+
+	case RXRPC_CALL_LOCAL_ERROR:
+	case RXRPC_CALL_NETWORK_ERROR:
+		/* Local errors from an attempt to connect a call and network
+		 * errors reported back by ICMP suggest skipping the current
+		 * address and trying the next.
+		 */
+		break;
+	}
+
+	/* Rotate servers if possible. */
+	srx = afs_get_address(&call->cursor);
+	if (IS_ERR(srx))
+		return PTR_ERR(srx);
+
+	_debug("retry %pISp", &srx->transport);
+	call->error = 0;
+	ret = rxrpc_kernel_retry_call(call->net->socket, call->rxcall,
+				      srx, call->key);
+	_leave(" = %d [retry]", ret);
+	return ret;
+}
+
+/*
  * Extract a piece of data from the received data socket buffers.
  */
 int afs_extract_data(struct afs_call *call, void *buf, size_t count,
@@ -850,10 +985,15 @@
 		return 0;
 	}
 
-	if (ret == -ECONNABORTED)
+	if (ret == -ECONNABORTED) {
 		call->error = call->type->abort_to_error(call->abort_code);
-	else
-		call->error = ret;
+		goto out;
+	}
+
+	ret = afs_retry_call(call, ret);
+	if (ret == 0)
+		return -EAGAIN;
+out:
 	call->state = AFS_CALL_COMPLETE;
 	return ret;
 }
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 57c2f60..0f2e849 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -37,7 +37,9 @@
 		p = *pp;
 		_debug("- consider %p", p);
 		xserver = rb_entry(p, struct afs_server, master_rb);
-		diff = memcmp(&server->addr, &xserver->addr, sizeof(server->addr));
+		diff = memcmp(&server->addrs->addrs[0],
+			      &xserver->addrs->addrs[0],
+			      sizeof(sizeof(server->addrs->addrs[0])));
 		if (diff < 0)
 			pp = &(*pp)->rb_left;
 		else if (diff > 0)
@@ -66,28 +68,41 @@
 	_enter("");
 
 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
-	if (server) {
-		atomic_set(&server->usage, 1);
-		server->net = cell->net;
-		server->cell = cell;
+	if (!server)
+		goto enomem;
+	server->addrs = kzalloc(sizeof(struct afs_addr_list) +
+				sizeof(struct sockaddr_rxrpc),
+				GFP_KERNEL);
+	if (!server->addrs)
+		goto enomem_server;
 
-		INIT_LIST_HEAD(&server->link);
-		INIT_LIST_HEAD(&server->grave);
-		init_rwsem(&server->sem);
-		spin_lock_init(&server->fs_lock);
-		server->fs_vnodes = RB_ROOT;
-		server->cb_promises = RB_ROOT;
-		spin_lock_init(&server->cb_lock);
-		init_waitqueue_head(&server->cb_break_waitq);
-		INIT_DELAYED_WORK(&server->cb_break_work,
-				  afs_dispatch_give_up_callbacks);
+	atomic_set(&server->usage, 1);
+	server->net = cell->net;
+	server->cell = cell;
 
-		server->addr = *addr;
-		_leave(" = %p{%d}", server, atomic_read(&server->usage));
-	} else {
-		_leave(" = NULL [nomem]");
-	}
+	INIT_LIST_HEAD(&server->link);
+	INIT_LIST_HEAD(&server->grave);
+	init_rwsem(&server->sem);
+	spin_lock_init(&server->fs_lock);
+	server->fs_vnodes = RB_ROOT;
+	server->cb_promises = RB_ROOT;
+	spin_lock_init(&server->cb_lock);
+	init_waitqueue_head(&server->cb_break_waitq);
+	INIT_DELAYED_WORK(&server->cb_break_work,
+			  afs_dispatch_give_up_callbacks);
+
+	refcount_set(&server->addrs->usage, 1);
+	server->addrs->nr_addrs = 1;
+	server->addrs->addrs[0] = *addr;
+
+	_leave(" = %p{%d}", server, atomic_read(&server->usage));
 	return server;
+
+enomem_server:
+	kfree(server);
+enomem:
+	_leave(" = NULL [nomem]");
+	return NULL;
 }
 
 /*
@@ -104,7 +119,7 @@
 	read_lock(&cell->servers_lock);
 
 	list_for_each_entry(server, &cell->servers, link) {
-		if (memcmp(&server->addr, addr, sizeof(*addr)) == 0)
+		if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0)
 			goto found_server_quickly;
 	}
 	read_unlock(&cell->servers_lock);
@@ -119,7 +134,7 @@
 
 	/* check the cell's server list again */
 	list_for_each_entry(server, &cell->servers, link) {
-		if (memcmp(&server->addr, addr, sizeof(*addr)) == 0)
+		if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0)
 			goto found_server;
 	}
 
@@ -187,7 +202,7 @@
 
 		_debug("- consider %p", p);
 
-		diff = memcmp(srx, &server->addr, sizeof(*srx));
+		diff = memcmp(srx, &server->addrs->addrs[0], sizeof(*srx));
 		if (diff < 0) {
 			p = p->rb_left;
 		} else if (diff > 0) {
@@ -256,6 +271,7 @@
 	ASSERTCMP(atomic_read(&server->cb_break_n), ==, 0);
 
 	afs_put_cell(server->net, server->cell);
+	afs_put_addrlist(server->addrs);
 	kfree(server);
 }
 
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 276319a..54d02e5 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -156,8 +156,7 @@
 /*
  * dispatch a get volume entry by name operation
  */
-int afs_vl_get_entry_by_name(struct afs_net *net,
-			     struct sockaddr_rxrpc *addr,
+int afs_vl_get_entry_by_name(struct afs_cell *cell,
 			     struct key *key,
 			     const char *volname,
 			     struct afs_cache_vlocation *entry,
@@ -173,10 +172,13 @@
 	padsz = (4 - (volnamesz & 3)) & 3;
 	reqsz = 8 + volnamesz + padsz;
 
-	call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByName, reqsz, 384);
+	call = afs_alloc_flat_call(cell->net, &afs_RXVLGetEntryByName,
+				   reqsz, 384);
 	if (!call)
 		return -ENOMEM;
 
+	afs_set_vl_cursor(call, cell);
+
 	call->key = key;
 	call->reply = entry;
 
@@ -189,14 +191,13 @@
 		memset((void *) bp + volnamesz, 0, padsz);
 
 	/* initiate the call */
-	return afs_make_call(addr, call, GFP_KERNEL, async);
+	return afs_make_call(call, GFP_KERNEL, async);
 }
 
 /*
  * dispatch a get volume entry by ID operation
  */
-int afs_vl_get_entry_by_id(struct afs_net *net,
-			   struct sockaddr_rxrpc *addr,
+int afs_vl_get_entry_by_id(struct afs_cell *cell,
 			   struct key *key,
 			   afs_volid_t volid,
 			   afs_voltype_t voltype,
@@ -208,10 +209,12 @@
 
 	_enter("");
 
-	call = afs_alloc_flat_call(net, &afs_RXVLGetEntryById, 12, 384);
+	call = afs_alloc_flat_call(cell->net, &afs_RXVLGetEntryById, 12, 384);
 	if (!call)
 		return -ENOMEM;
 
+	afs_set_vl_cursor(call, cell);
+
 	call->key = key;
 	call->reply = entry;
 
@@ -222,5 +225,5 @@
 	*bp   = htonl(voltype);
 
 	/* initiate the call */
-	return afs_make_call(addr, call, GFP_KERNEL, async);
+	return afs_make_call(call, GFP_KERNEL, async);
 }
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index ec5ab8d..8c64a16 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -22,137 +22,6 @@
 static unsigned afs_vlocation_update_timeout = 10 * 60;
 
 /*
- * iterate through the VL servers in a cell until one of them admits knowing
- * about the volume in question
- */
-static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
-					   struct key *key,
-					   struct afs_cache_vlocation *vldb)
-{
-	struct afs_cell *cell = vl->cell;
-	int count, ret;
-
-	_enter("%s,%s", cell->name, vl->vldb.name);
-
-	down_write(&vl->cell->vl_sem);
-	ret = -ENOMEDIUM;
-	for (count = cell->vl_naddrs; count > 0; count--) {
-		struct sockaddr_rxrpc *addr = &cell->vl_addrs[cell->vl_curr_svix];
-
-		_debug("CellServ[%hu]: %pIS", cell->vl_curr_svix, &addr->transport);
-
-		/* attempt to access the VL server */
-		ret = afs_vl_get_entry_by_name(cell->net, addr, key,
-					       vl->vldb.name, vldb, false);
-		switch (ret) {
-		case 0:
-			goto out;
-		case -ENOMEM:
-		case -ENONET:
-		case -ENETUNREACH:
-		case -EHOSTUNREACH:
-		case -ECONNREFUSED:
-			if (ret == -ENOMEM || ret == -ENONET)
-				goto out;
-			goto rotate;
-		case -ENOMEDIUM:
-		case -EKEYREJECTED:
-		case -EKEYEXPIRED:
-			goto out;
-		default:
-			ret = -EIO;
-			goto rotate;
-		}
-
-		/* rotate the server records upon lookup failure */
-	rotate:
-		cell->vl_curr_svix++;
-		cell->vl_curr_svix %= cell->vl_naddrs;
-	}
-
-out:
-	up_write(&vl->cell->vl_sem);
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * iterate through the VL servers in a cell until one of them admits knowing
- * about the volume in question
- */
-static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
-					 struct key *key,
-					 afs_volid_t volid,
-					 afs_voltype_t voltype,
-					 struct afs_cache_vlocation *vldb)
-{
-	struct afs_cell *cell = vl->cell;
-	int count, ret;
-
-	_enter("%s,%x,%d,", cell->name, volid, voltype);
-
-	down_write(&vl->cell->vl_sem);
-	ret = -ENOMEDIUM;
-	for (count = cell->vl_naddrs; count > 0; count--) {
-		struct sockaddr_rxrpc *addr = &cell->vl_addrs[cell->vl_curr_svix];
-
-		_debug("CellServ[%hu]: %pIS", cell->vl_curr_svix, &addr->transport);
-
-		/* attempt to access the VL server */
-		ret = afs_vl_get_entry_by_id(cell->net, addr, key, volid,
-					     voltype, vldb, false);
-		switch (ret) {
-		case 0:
-			goto out;
-		case -ENOMEM:
-		case -ENONET:
-		case -ENETUNREACH:
-		case -EHOSTUNREACH:
-		case -ECONNREFUSED:
-			if (ret == -ENOMEM || ret == -ENONET)
-				goto out;
-			goto rotate;
-		case -EBUSY:
-			vl->upd_busy_cnt++;
-			if (vl->upd_busy_cnt <= 3) {
-				if (vl->upd_busy_cnt > 1) {
-					/* second+ BUSY - sleep a little bit */
-					set_current_state(TASK_UNINTERRUPTIBLE);
-					schedule_timeout(1);
-				}
-				continue;
-			}
-			break;
-		case -ENOMEDIUM:
-			vl->upd_rej_cnt++;
-			goto rotate;
-		default:
-			ret = -EIO;
-			goto rotate;
-		}
-
-		/* rotate the server records upon lookup failure */
-	rotate:
-		cell->vl_curr_svix++;
-		cell->vl_curr_svix %= cell->vl_naddrs;
-		vl->upd_busy_cnt = 0;
-	}
-
-out:
-	if (ret < 0 && vl->upd_rej_cnt > 0) {
-		printk(KERN_NOTICE "kAFS:"
-		       " Active volume no longer valid '%s'\n",
-		       vl->vldb.name);
-		vl->valid = 0;
-		ret = -ENOMEDIUM;
-	}
-
-	up_write(&vl->cell->vl_sem);
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
  * allocate a volume location record
  */
 static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
@@ -197,6 +66,7 @@
 	       vl->vldb.vid[1],
 	       vl->vldb.vid[2]);
 
+retry:
 	if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
 		vid = vl->vldb.vid[0];
 		voltype = AFSVL_RWVOL;
@@ -215,7 +85,8 @@
 	/* contact the server to make sure the volume is still available
 	 * - TODO: need to handle disconnected operation here
 	 */
-	ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
+	ret = afs_vl_get_entry_by_id(vl->cell, key, vid, voltype,
+				     vldb, false);
 	switch (ret) {
 		/* net error */
 	default:
@@ -239,6 +110,18 @@
 		/* TODO: make existing record unavailable */
 		_leave(" = %d", ret);
 		return ret;
+
+	case -EBUSY:
+		vl->upd_busy_cnt++;
+		if (vl->upd_busy_cnt <= 3) {
+			if (vl->upd_busy_cnt > 1) {
+				/* second+ BUSY - sleep a little bit */
+				set_current_state(TASK_UNINTERRUPTIBLE);
+				schedule_timeout(1);
+			}
+			goto retry;
+		}
+		return -EBUSY;
 	}
 }
 
@@ -278,7 +161,8 @@
 	memset(&vldb, 0, sizeof(vldb));
 
 	/* Try to look up an unknown volume in the cell VL databases by name */
-	ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
+	ret = afs_vl_get_entry_by_name(vl->cell, key, vl->vldb.name,
+				       &vldb, false);
 	if (ret < 0) {
 		printk("kAFS: failed to locate '%s' in cell '%s'\n",
 		       vl->vldb.name, vl->cell->name);
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index 64834b20..8dcf492 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -354,8 +354,7 @@
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %p{%pIS}",
-		       server, &server->addr.transport);
+		_debug("USING SERVER: %pISp", &server->addrs->addrs[0].transport);
 
 		ret = afs_fs_fetch_file_status(server, key, vnode, NULL,
 					       false);
@@ -418,7 +417,7 @@
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		_debug("USING SERVER: %pISp", &server->addrs->addrs[0].transport);
 
 		ret = afs_fs_fetch_data(server, key, vnode, desc,
 					false);
@@ -474,7 +473,7 @@
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		_debug("USING SERVER: %pISp", &server->addrs->addrs[0].transport);
 
 		ret = afs_fs_create(server, key, vnode, name, mode, newfid,
 				    newstatus, newcb, false);
@@ -530,7 +529,7 @@
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		_debug("USING SERVER: %pIS\n", &server->addrs->addrs[0].transport);
 
 		ret = afs_fs_remove(server, key, vnode, name, isdir,
 				    false);
@@ -592,7 +591,7 @@
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		_debug("USING SERVER: %pIS\n", &server->addrs->addrs[0].transport);
 
 		ret = afs_fs_link(server, key, dvnode, vnode, name,
 				  false);
@@ -656,7 +655,7 @@
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		_debug("USING SERVER: %pIS\n", &server->addrs->addrs[0].transport);
 
 		ret = afs_fs_symlink(server, key, vnode, name, content,
 				     newfid, newstatus, false);
@@ -726,7 +725,7 @@
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		_debug("USING SERVER: %pIS\n", &server->addrs->addrs[0].transport);
 
 		ret = afs_fs_rename(server, key, orig_dvnode, orig_name,
 				    new_dvnode, new_name, false);
@@ -792,7 +791,7 @@
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		_debug("USING SERVER: %pIS\n", &server->addrs->addrs[0].transport);
 
 		ret = afs_fs_store_data(server, wb, first, last, offset, to,
 					false);
@@ -845,7 +844,7 @@
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		_debug("USING SERVER: %pIS\n", &server->addrs->addrs[0].transport);
 
 		ret = afs_fs_setattr(server, key, vnode, attr, false);
 
@@ -892,7 +891,7 @@
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		_debug("USING SERVER: %pIS\n", &server->addrs->addrs[0].transport);
 
 		ret = afs_fs_get_volume_status(server, key, vnode, vs, false);
 
@@ -931,7 +930,7 @@
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		_debug("USING SERVER: %pIS\n", &server->addrs->addrs[0].transport);
 
 		ret = afs_fs_set_lock(server, key, vnode, type, false);
 
@@ -969,7 +968,7 @@
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		_debug("USING SERVER: %pIS\n", &server->addrs->addrs[0].transport);
 
 		ret = afs_fs_extend_lock(server, key, vnode, false);
 
@@ -1007,7 +1006,7 @@
 		if (IS_ERR(server))
 			goto no_server;
 
-		_debug("USING SERVER: %pIS\n", &server->addr.transport);
+		_debug("USING SERVER: %pIS\n", &server->addrs->addrs[0].transport);
 
 		ret = afs_fs_release_lock(server, key, vnode, false);
 
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index fbbb470..c0d4e97 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -249,7 +249,7 @@
 			afs_get_server(server);
 			up_read(&volume->server_sem);
 			_leave(" = %p (picked %pIS)",
-			       server, &server->addr.transport);
+			       server, &server->addrs->addrs[0].transport);
 			return server;
 
 		case -ENETUNREACH:
@@ -304,7 +304,8 @@
 	unsigned loop;
 
 	_enter("%s,%pIS,%d",
-	       volume->vlocation->vldb.name, &server->addr.transport, result);
+	       volume->vlocation->vldb.name, &server->addrs->addrs[0].transport,
+	       result);
 
 	switch (result) {
 		/* success */