| // SPDX-License-Identifier: GPL-2.0-or-later | 
 | /* AFS fileserver probing | 
 |  * | 
 |  * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved. | 
 |  * Written by David Howells (dhowells@redhat.com) | 
 |  */ | 
 |  | 
 | #include <linux/sched.h> | 
 | #include <linux/slab.h> | 
 | #include "afs_fs.h" | 
 | #include "internal.h" | 
 | #include "protocol_afs.h" | 
 | #include "protocol_yfs.h" | 
 |  | 
 | static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ; | 
 | static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ; | 
 |  | 
 | /* | 
 |  * Start the probe polling timer.  We have to supply it with an inc on the | 
 |  * outstanding server count. | 
 |  */ | 
 | static void afs_schedule_fs_probe(struct afs_net *net, | 
 | 				  struct afs_server *server, bool fast) | 
 | { | 
 | 	unsigned long atj; | 
 |  | 
 | 	if (!net->live) | 
 | 		return; | 
 |  | 
 | 	atj = server->probed_at; | 
 | 	atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval; | 
 |  | 
 | 	afs_inc_servers_outstanding(net); | 
 | 	if (timer_reduce(&net->fs_probe_timer, atj)) | 
 | 		afs_dec_servers_outstanding(net); | 
 | } | 
 |  | 
 | /* | 
 |  * Handle the completion of a set of probes. | 
 |  */ | 
 | static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server) | 
 | { | 
 | 	bool responded = server->probe.responded; | 
 |  | 
 | 	write_seqlock(&net->fs_lock); | 
 | 	if (responded) { | 
 | 		list_add_tail(&server->probe_link, &net->fs_probe_slow); | 
 | 	} else { | 
 | 		server->rtt = UINT_MAX; | 
 | 		clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags); | 
 | 		list_add_tail(&server->probe_link, &net->fs_probe_fast); | 
 | 	} | 
 | 	write_sequnlock(&net->fs_lock); | 
 |  | 
 | 	afs_schedule_fs_probe(net, server, !responded); | 
 | } | 
 |  | 
 | /* | 
 |  * Handle the completion of a probe. | 
 |  */ | 
 | static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server) | 
 | { | 
 | 	_enter(""); | 
 |  | 
 | 	if (atomic_dec_and_test(&server->probe_outstanding)) | 
 | 		afs_finished_fs_probe(net, server); | 
 |  | 
 | 	wake_up_all(&server->probe_wq); | 
 | } | 
 |  | 
 | /* | 
 |  * Handle inability to send a probe due to ENOMEM when trying to allocate a | 
 |  * call struct. | 
 |  */ | 
 | static void afs_fs_probe_not_done(struct afs_net *net, | 
 | 				  struct afs_server *server, | 
 | 				  struct afs_addr_cursor *ac) | 
 | { | 
 | 	struct afs_addr_list *alist = ac->alist; | 
 | 	unsigned int index = ac->index; | 
 |  | 
 | 	_enter(""); | 
 |  | 
 | 	trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail); | 
 | 	spin_lock(&server->probe_lock); | 
 |  | 
 | 	server->probe.local_failure = true; | 
 | 	if (server->probe.error == 0) | 
 | 		server->probe.error = -ENOMEM; | 
 |  | 
 | 	set_bit(index, &alist->failed); | 
 |  | 
 | 	spin_unlock(&server->probe_lock); | 
 | 	return afs_done_one_fs_probe(net, server); | 
 | } | 
 |  | 
 | /* | 
 |  * Process the result of probing a fileserver.  This is called after successful | 
 |  * or failed delivery of an FS.GetCapabilities operation. | 
 |  */ | 
 | void afs_fileserver_probe_result(struct afs_call *call) | 
 | { | 
 | 	struct afs_addr_list *alist = call->alist; | 
 | 	struct afs_server *server = call->server; | 
 | 	unsigned int index = call->addr_ix; | 
 | 	unsigned int rtt_us = 0, cap0; | 
 | 	int ret = call->error; | 
 |  | 
 | 	_enter("%pU,%u", &server->uuid, index); | 
 |  | 
 | 	spin_lock(&server->probe_lock); | 
 |  | 
 | 	switch (ret) { | 
 | 	case 0: | 
 | 		server->probe.error = 0; | 
 | 		goto responded; | 
 | 	case -ECONNABORTED: | 
 | 		if (!server->probe.responded) { | 
 | 			server->probe.abort_code = call->abort_code; | 
 | 			server->probe.error = ret; | 
 | 		} | 
 | 		goto responded; | 
 | 	case -ENOMEM: | 
 | 	case -ENONET: | 
 | 		clear_bit(index, &alist->responded); | 
 | 		server->probe.local_failure = true; | 
 | 		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); | 
 | 		goto out; | 
 | 	case -ECONNRESET: /* Responded, but call expired. */ | 
 | 	case -ERFKILL: | 
 | 	case -EADDRNOTAVAIL: | 
 | 	case -ENETUNREACH: | 
 | 	case -EHOSTUNREACH: | 
 | 	case -EHOSTDOWN: | 
 | 	case -ECONNREFUSED: | 
 | 	case -ETIMEDOUT: | 
 | 	case -ETIME: | 
 | 	default: | 
 | 		clear_bit(index, &alist->responded); | 
 | 		set_bit(index, &alist->failed); | 
 | 		if (!server->probe.responded && | 
 | 		    (server->probe.error == 0 || | 
 | 		     server->probe.error == -ETIMEDOUT || | 
 | 		     server->probe.error == -ETIME)) | 
 | 			server->probe.error = ret; | 
 | 		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); | 
 | 		goto out; | 
 | 	} | 
 |  | 
 | responded: | 
 | 	clear_bit(index, &alist->failed); | 
 |  | 
 | 	if (call->service_id == YFS_FS_SERVICE) { | 
 | 		server->probe.is_yfs = true; | 
 | 		set_bit(AFS_SERVER_FL_IS_YFS, &server->flags); | 
 | 		alist->addrs[index].srx_service = call->service_id; | 
 | 	} else { | 
 | 		server->probe.not_yfs = true; | 
 | 		if (!server->probe.is_yfs) { | 
 | 			clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags); | 
 | 			alist->addrs[index].srx_service = call->service_id; | 
 | 		} | 
 | 		cap0 = ntohl(call->tmp); | 
 | 		if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES) | 
 | 			set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); | 
 | 		else | 
 | 			clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); | 
 | 	} | 
 |  | 
 | 	rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); | 
 | 	if (rtt_us < server->probe.rtt) { | 
 | 		server->probe.rtt = rtt_us; | 
 | 		server->rtt = rtt_us; | 
 | 		alist->preferred = index; | 
 | 	} | 
 |  | 
 | 	smp_wmb(); /* Set rtt before responded. */ | 
 | 	server->probe.responded = true; | 
 | 	set_bit(index, &alist->responded); | 
 | 	set_bit(AFS_SERVER_FL_RESPONDING, &server->flags); | 
 | out: | 
 | 	spin_unlock(&server->probe_lock); | 
 |  | 
 | 	_debug("probe %pU [%u] %pISpc rtt=%u ret=%d", | 
 | 	       &server->uuid, index, &alist->addrs[index].transport, | 
 | 	       rtt_us, ret); | 
 |  | 
 | 	return afs_done_one_fs_probe(call->net, server); | 
 | } | 
 |  | 
 | /* | 
 |  * Probe one or all of a fileserver's addresses to find out the best route and | 
 |  * to query its capabilities. | 
 |  */ | 
 | void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, | 
 | 			     struct key *key, bool all) | 
 | { | 
 | 	struct afs_addr_cursor ac = { | 
 | 		.index = 0, | 
 | 	}; | 
 |  | 
 | 	_enter("%pU", &server->uuid); | 
 |  | 
 | 	read_lock(&server->fs_lock); | 
 | 	ac.alist = rcu_dereference_protected(server->addresses, | 
 | 					     lockdep_is_held(&server->fs_lock)); | 
 | 	afs_get_addrlist(ac.alist); | 
 | 	read_unlock(&server->fs_lock); | 
 |  | 
 | 	server->probed_at = jiffies; | 
 | 	atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1); | 
 | 	memset(&server->probe, 0, sizeof(server->probe)); | 
 | 	server->probe.rtt = UINT_MAX; | 
 |  | 
 | 	ac.index = ac.alist->preferred; | 
 | 	if (ac.index < 0 || ac.index >= ac.alist->nr_addrs) | 
 | 		all = true; | 
 |  | 
 | 	if (all) { | 
 | 		for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) | 
 | 			if (!afs_fs_get_capabilities(net, server, &ac, key)) | 
 | 				afs_fs_probe_not_done(net, server, &ac); | 
 | 	} else { | 
 | 		if (!afs_fs_get_capabilities(net, server, &ac, key)) | 
 | 			afs_fs_probe_not_done(net, server, &ac); | 
 | 	} | 
 |  | 
 | 	afs_put_addrlist(ac.alist); | 
 | } | 
 |  | 
 | /* | 
 |  * Wait for the first as-yet untried fileserver to respond. | 
 |  */ | 
 | int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) | 
 | { | 
 | 	struct wait_queue_entry *waits; | 
 | 	struct afs_server *server; | 
 | 	unsigned int rtt = UINT_MAX, rtt_s; | 
 | 	bool have_responders = false; | 
 | 	int pref = -1, i; | 
 |  | 
 | 	_enter("%u,%lx", slist->nr_servers, untried); | 
 |  | 
 | 	/* Only wait for servers that have a probe outstanding. */ | 
 | 	for (i = 0; i < slist->nr_servers; i++) { | 
 | 		if (test_bit(i, &untried)) { | 
 | 			server = slist->servers[i].server; | 
 | 			if (!atomic_read(&server->probe_outstanding)) | 
 | 				__clear_bit(i, &untried); | 
 | 			if (server->probe.responded) | 
 | 				have_responders = true; | 
 | 		} | 
 | 	} | 
 | 	if (have_responders || !untried) | 
 | 		return 0; | 
 |  | 
 | 	waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL); | 
 | 	if (!waits) | 
 | 		return -ENOMEM; | 
 |  | 
 | 	for (i = 0; i < slist->nr_servers; i++) { | 
 | 		if (test_bit(i, &untried)) { | 
 | 			server = slist->servers[i].server; | 
 | 			init_waitqueue_entry(&waits[i], current); | 
 | 			add_wait_queue(&server->probe_wq, &waits[i]); | 
 | 		} | 
 | 	} | 
 |  | 
 | 	for (;;) { | 
 | 		bool still_probing = false; | 
 |  | 
 | 		set_current_state(TASK_INTERRUPTIBLE); | 
 | 		for (i = 0; i < slist->nr_servers; i++) { | 
 | 			if (test_bit(i, &untried)) { | 
 | 				server = slist->servers[i].server; | 
 | 				if (server->probe.responded) | 
 | 					goto stop; | 
 | 				if (atomic_read(&server->probe_outstanding)) | 
 | 					still_probing = true; | 
 | 			} | 
 | 		} | 
 |  | 
 | 		if (!still_probing || signal_pending(current)) | 
 | 			goto stop; | 
 | 		schedule(); | 
 | 	} | 
 |  | 
 | stop: | 
 | 	set_current_state(TASK_RUNNING); | 
 |  | 
 | 	for (i = 0; i < slist->nr_servers; i++) { | 
 | 		if (test_bit(i, &untried)) { | 
 | 			server = slist->servers[i].server; | 
 | 			rtt_s = READ_ONCE(server->rtt); | 
 | 			if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) && | 
 | 			    rtt_s < rtt) { | 
 | 				pref = i; | 
 | 				rtt = rtt_s; | 
 | 			} | 
 |  | 
 | 			remove_wait_queue(&server->probe_wq, &waits[i]); | 
 | 		} | 
 | 	} | 
 |  | 
 | 	kfree(waits); | 
 |  | 
 | 	if (pref == -1 && signal_pending(current)) | 
 | 		return -ERESTARTSYS; | 
 |  | 
 | 	if (pref >= 0) | 
 | 		slist->preferred = pref; | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* | 
 |  * Probe timer.  We have an increment on fs_outstanding that we need to pass | 
 |  * along to the work item. | 
 |  */ | 
 | void afs_fs_probe_timer(struct timer_list *timer) | 
 | { | 
 | 	struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer); | 
 |  | 
 | 	if (!net->live || !queue_work(afs_wq, &net->fs_prober)) | 
 | 		afs_dec_servers_outstanding(net); | 
 | } | 
 |  | 
 | /* | 
 |  * Dispatch a probe to a server. | 
 |  */ | 
 | static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all) | 
 | 	__releases(&net->fs_lock) | 
 | { | 
 | 	struct key *key = NULL; | 
 |  | 
 | 	/* We remove it from the queues here - it will be added back to | 
 | 	 * one of the queues on the completion of the probe. | 
 | 	 */ | 
 | 	list_del_init(&server->probe_link); | 
 |  | 
 | 	afs_get_server(server, afs_server_trace_get_probe); | 
 | 	write_sequnlock(&net->fs_lock); | 
 |  | 
 | 	afs_fs_probe_fileserver(net, server, key, all); | 
 | 	afs_put_server(net, server, afs_server_trace_put_probe); | 
 | } | 
 |  | 
 | /* | 
 |  * Probe a server immediately without waiting for its due time to come | 
 |  * round.  This is used when all of the addresses have been tried. | 
 |  */ | 
 | void afs_probe_fileserver(struct afs_net *net, struct afs_server *server) | 
 | { | 
 | 	write_seqlock(&net->fs_lock); | 
 | 	if (!list_empty(&server->probe_link)) | 
 | 		return afs_dispatch_fs_probe(net, server, true); | 
 | 	write_sequnlock(&net->fs_lock); | 
 | } | 
 |  | 
 | /* | 
 |  * Probe dispatcher to regularly dispatch probes to keep NAT alive. | 
 |  */ | 
 | void afs_fs_probe_dispatcher(struct work_struct *work) | 
 | { | 
 | 	struct afs_net *net = container_of(work, struct afs_net, fs_prober); | 
 | 	struct afs_server *fast, *slow, *server; | 
 | 	unsigned long nowj, timer_at, poll_at; | 
 | 	bool first_pass = true, set_timer = false; | 
 |  | 
 | 	if (!net->live) { | 
 | 		afs_dec_servers_outstanding(net); | 
 | 		return; | 
 | 	} | 
 |  | 
 | 	_enter(""); | 
 |  | 
 | 	if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) { | 
 | 		afs_dec_servers_outstanding(net); | 
 | 		_leave(" [none]"); | 
 | 		return; | 
 | 	} | 
 |  | 
 | again: | 
 | 	write_seqlock(&net->fs_lock); | 
 |  | 
 | 	fast = slow = server = NULL; | 
 | 	nowj = jiffies; | 
 | 	timer_at = nowj + MAX_JIFFY_OFFSET; | 
 |  | 
 | 	if (!list_empty(&net->fs_probe_fast)) { | 
 | 		fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link); | 
 | 		poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval; | 
 | 		if (time_before(nowj, poll_at)) { | 
 | 			timer_at = poll_at; | 
 | 			set_timer = true; | 
 | 			fast = NULL; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	if (!list_empty(&net->fs_probe_slow)) { | 
 | 		slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link); | 
 | 		poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval; | 
 | 		if (time_before(nowj, poll_at)) { | 
 | 			if (time_before(poll_at, timer_at)) | 
 | 			    timer_at = poll_at; | 
 | 			set_timer = true; | 
 | 			slow = NULL; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	server = fast ?: slow; | 
 | 	if (server) | 
 | 		_debug("probe %pU", &server->uuid); | 
 |  | 
 | 	if (server && (first_pass || !need_resched())) { | 
 | 		afs_dispatch_fs_probe(net, server, server == fast); | 
 | 		first_pass = false; | 
 | 		goto again; | 
 | 	} | 
 |  | 
 | 	write_sequnlock(&net->fs_lock); | 
 |  | 
 | 	if (server) { | 
 | 		if (!queue_work(afs_wq, &net->fs_prober)) | 
 | 			afs_dec_servers_outstanding(net); | 
 | 		_leave(" [requeue]"); | 
 | 	} else if (set_timer) { | 
 | 		if (timer_reduce(&net->fs_probe_timer, timer_at)) | 
 | 			afs_dec_servers_outstanding(net); | 
 | 		_leave(" [timer]"); | 
 | 	} else { | 
 | 		afs_dec_servers_outstanding(net); | 
 | 		_leave(" [quiesce]"); | 
 | 	} | 
 | } | 
 |  | 
 | /* | 
 |  * Wait for a probe on a particular fileserver to complete for 2s. | 
 |  */ | 
 | int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr) | 
 | { | 
 | 	struct wait_queue_entry wait; | 
 | 	unsigned long timo = 2 * HZ; | 
 |  | 
 | 	if (atomic_read(&server->probe_outstanding) == 0) | 
 | 		goto dont_wait; | 
 |  | 
 | 	init_wait_entry(&wait, 0); | 
 | 	for (;;) { | 
 | 		prepare_to_wait_event(&server->probe_wq, &wait, | 
 | 				      is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); | 
 | 		if (timo == 0 || | 
 | 		    server->probe.responded || | 
 | 		    atomic_read(&server->probe_outstanding) == 0 || | 
 | 		    (is_intr && signal_pending(current))) | 
 | 			break; | 
 | 		timo = schedule_timeout(timo); | 
 | 	} | 
 |  | 
 | 	finish_wait(&server->probe_wq, &wait); | 
 |  | 
 | dont_wait: | 
 | 	if (server->probe.responded) | 
 | 		return 0; | 
 | 	if (is_intr && signal_pending(current)) | 
 | 		return -ERESTARTSYS; | 
 | 	if (timo == 0) | 
 | 		return -ETIME; | 
 | 	return -EDESTADDRREQ; | 
 | } | 
 |  | 
 | /* | 
 |  * Clean up the probing when the namespace is killed off. | 
 |  */ | 
 | void afs_fs_probe_cleanup(struct afs_net *net) | 
 | { | 
 | 	if (del_timer_sync(&net->fs_probe_timer)) | 
 | 		afs_dec_servers_outstanding(net); | 
 | } |