sys-kernel/linux-headers/files/0038-BACKPORT-Add-io_uring-IO-interface.patch - third_party/overlays/chromiumos-overlay - Git at Google

 From 2b188cc1bb857a9d4701ae59aa7768b5124e262e Mon Sep 17 00:00:00 2001
 From: Jens Axboe <axboe@kernel.dk>
 Date: Mon, 7 Jan 2019 10:46:33 -0700
 Subject: [PATCH] BACKPORT: Add io_uring IO interface

 The submission queue (SQ) and completion queue (CQ) rings are shared
 between the application and the kernel. This eliminates the need to
 copy data back and forth to submit and complete IO.

 IO submissions use the io_uring_sqe data structure, and completions
 are generated in the form of io_uring_cqe data structures. The SQ
 ring is an index into the io_uring_sqe array, which makes it possible
 to submit a batch of IOs without them being contiguous in the ring.
 The CQ ring is always contiguous, as completion events are inherently
 unordered, and hence any io_uring_cqe entry can point back to an
 arbitrary submission.

 Two new system calls are added for this:

 io_uring_setup(entries, params)
 	Sets up an io_uring instance for doing async IO. On success,
 	returns a file descriptor that the application can mmap to
 	gain access to the SQ ring, CQ ring, and io_uring_sqes.

 io_uring_enter(fd, to_submit, min_complete, flags, sigset, sigsetsize)
 	Initiates IO against the rings mapped to this fd, or waits for
 	them to complete, or both. The behavior is controlled by the
 	parameters passed in. If 'to_submit' is non-zero, then we'll
 	try and submit new IO. If IORING_ENTER_GETEVENTS is set, the
 	kernel will wait for 'min_complete' events, if they aren't
 	already available. It's valid to set IORING_ENTER_GETEVENTS
 	and 'min_complete' == 0 at the same time, this allows the
 	kernel to return already completed events without waiting
 	for them. This is useful only for polling, as for IRQ
 	driven IO, the application can just check the CQ ring
 	without entering the kernel.

 With this setup, it's possible to do async IO with a single system
 call. Future developments will enable polled IO with this interface,
 and polled submission as well. The latter will enable an application
 to do IO without doing ANY system calls at all.

 For IRQ driven IO, an application only needs to enter the kernel for
 completions if it wants to wait for them to occur.

 Each io_uring is backed by a workqueue, to support buffered async IO
 as well. We will only punt to an async context if the command would
 need to wait for IO on the device side. Any data that can be accessed
 directly in the page cache is done inline. This avoids the slowness
 issue of usual threadpools, since cached data is accessed as quickly
 as a sync interface.

 Sample application: http://git.kernel.dk/cgit/fio/plain/t/io_uring.c

 dgreid - Also pull in the ARM and ARM64 changes which were adding in
 a separate and only partially related commit.

 Reviewed-by: Hannes Reinecke <hare@suse.com>
 Signed-off-by: Jens Axboe <axboe@kernel.dk>
 ---

 diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
 --- a/arch/arm/tools/syscall.tbl	2017-12-28 15:53:02.000000000 +0000
 +++ b/arch/arm/tools/syscall.tbl	2020-10-30 03:59:25.357442520 +0000
 @@ -412,3 +412,5 @@
  395	common	pkey_alloc		sys_pkey_alloc
  396	common	pkey_free		sys_pkey_free
  397	common	statx			sys_statx
 +425	common	io_uring_setup		sys_io_uring_setup
 +426	common	io_uring_enter		sys_io_uring_enter
 diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
 --- a/arch/arm64/include/asm/unistd32.h	2017-12-28 15:53:02.000000000 +0000
 +++ b/arch/arm64/include/asm/unistd32.h	2020-10-30 04:02:43.711946882 +0000
 @@ -817,6 +817,10 @@
  __SYSCALL(__NR_pkey_free, sys_pkey_free)
  #define __NR_statx 397
  __SYSCALL(__NR_statx, sys_statx)
 +#define __NR_io_uring_setup 425
 +__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
 +#define __NR_io_uring_enter 426
 +__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)

  /*
   * Please add new compat syscalls above this comment and update
 diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
 --- a/arch/arm64/include/asm/unistd.h	2017-12-28 15:53:02.000000000 +0000
 +++ b/arch/arm64/include/asm/unistd.h	2020-10-30 04:02:56.152354901 +0000
 @@ -43,7 +43,7 @@
  #define __ARM_NR_compat_cacheflush	(__ARM_NR_COMPAT_BASE+2)
  #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE+5)

 -#define __NR_compat_syscalls		398
 +#define __NR_compat_syscalls		427
  #endif

  #define __ARCH_WANT_SYS_CLONE
 diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
 --- a/arch/x86/entry/syscalls/syscall_32.tbl	2017-12-28 15:53:02.000000000 +0000
 +++ b/arch/x86/entry/syscalls/syscall_32.tbl	2020-10-30 03:58:02.198716480 +0000
 @@ -391,3 +391,5 @@
  382	i386	pkey_free		sys_pkey_free
  383	i386	statx			sys_statx
  384	i386	arch_prctl		sys_arch_prctl			compat_sys_arch_prctl
 +425	i386	io_uring_setup		sys_io_uring_setup		__ia32_sys_io_uring_setup
 +426	i386	io_uring_enter		sys_io_uring_enter		__ia32_sys_io_uring_enter
 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
 --- a/arch/x86/entry/syscalls/syscall_64.tbl	2017-12-28 15:53:02.000000000 +0000
 +++ b/arch/x86/entry/syscalls/syscall_64.tbl	2020-10-30 03:49:50.902629025 +0000
 @@ -339,6 +339,8 @@
  330	common	pkey_alloc		sys_pkey_alloc
  331	common	pkey_free		sys_pkey_free
  332	common	statx			sys_statx
 +425	common	io_uring_setup		__x64_sys_io_uring_setup
 +426	common	io_uring_enter		__x64_sys_io_uring_enter

  #
  # x32-specific system call numbers start at 512 to avoid cache impact
 diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
 index 257cccba3062..3072dbaa7869 100644
 --- a/include/linux/syscalls.h
 +++ b/include/linux/syscalls.h
 @@ -67,6 +67,7 @@ struct file_handle;
  struct file_handle;
  struct sigaltstack;
  union bpf_attr;
 +struct io_uring_params;

  #include <linux/types.h>
  #include <linux/aio_abi.h>
 @@ -543,6 +544,11 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
  				struct iocb __user * __user *);
  asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
  			      struct io_event __user *result);
 +asmlinkage long sys_io_uring_setup(u32 entries,
 +				   struct io_uring_params __user *p);
 +asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
 +				   u32 min_complete, u32 flags,
 +				   const sigset_t __user *sig, size_t sigsz);
  asmlinkage long sys_sendfile(int out_fd, int in_fd,
  			     off_t __user *offset, size_t count);
  asmlinkage long sys_sendfile64(int out_fd, int in_fd,
 diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
 index d90127298f12..87871e7b7ea7 100644
 --- a/include/uapi/asm-generic/unistd.h
 +++ b/include/uapi/asm-generic/unistd.h
 @@ -732,9 +732,13 @@ __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
  __SYSCALL(__NR_pkey_free,     sys_pkey_free)
  #define __NR_kexec_file_load 291
  __SYSCALL(__NR_statx,     sys_statx)
 +#define __NR_io_uring_setup 425
 +__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
 +#define __NR_io_uring_enter 426
 +__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)

  #undef __NR_syscalls
 -#define __NR_syscalls 292
 +#define __NR_syscalls 427

  /*
   * 32 bit systems traditionally used different
 --
 2.20.1
	From 2b188cc1bb857a9d4701ae59aa7768b5124e262e Mon Sep 17 00:00:00 2001
	From: Jens Axboe <axboe@kernel.dk>
	Date: Mon, 7 Jan 2019 10:46:33 -0700
	Subject: [PATCH] BACKPORT: Add io_uring IO interface

	The submission queue (SQ) and completion queue (CQ) rings are shared
	between the application and the kernel. This eliminates the need to
	copy data back and forth to submit and complete IO.

	IO submissions use the io_uring_sqe data structure, and completions
	are generated in the form of io_uring_cqe data structures. The SQ
	ring is an index into the io_uring_sqe array, which makes it possible
	to submit a batch of IOs without them being contiguous in the ring.
	The CQ ring is always contiguous, as completion events are inherently
	unordered, and hence any io_uring_cqe entry can point back to an
	arbitrary submission.

	Two new system calls are added for this:

	io_uring_setup(entries, params)
	Sets up an io_uring instance for doing async IO. On success,
	returns a file descriptor that the application can mmap to
	gain access to the SQ ring, CQ ring, and io_uring_sqes.

	io_uring_enter(fd, to_submit, min_complete, flags, sigset, sigsetsize)
	Initiates IO against the rings mapped to this fd, or waits for
	them to complete, or both. The behavior is controlled by the
	parameters passed in. If 'to_submit' is non-zero, then we'll
	try and submit new IO. If IORING_ENTER_GETEVENTS is set, the
	kernel will wait for 'min_complete' events, if they aren't
	already available. It's valid to set IORING_ENTER_GETEVENTS
	and 'min_complete' == 0 at the same time, this allows the
	kernel to return already completed events without waiting
	for them. This is useful only for polling, as for IRQ
	driven IO, the application can just check the CQ ring
	without entering the kernel.

	With this setup, it's possible to do async IO with a single system
	call. Future developments will enable polled IO with this interface,
	and polled submission as well. The latter will enable an application
	to do IO without doing ANY system calls at all.

	For IRQ driven IO, an application only needs to enter the kernel for
	completions if it wants to wait for them to occur.

	Each io_uring is backed by a workqueue, to support buffered async IO
	as well. We will only punt to an async context if the command would
	need to wait for IO on the device side. Any data that can be accessed
	directly in the page cache is done inline. This avoids the slowness
	issue of usual threadpools, since cached data is accessed as quickly
	as a sync interface.

	Sample application: http://git.kernel.dk/cgit/fio/plain/t/io_uring.c

	dgreid - Also pull in the ARM and ARM64 changes which were adding in
	a separate and only partially related commit.

	Reviewed-by: Hannes Reinecke <hare@suse.com>
	Signed-off-by: Jens Axboe <axboe@kernel.dk>
	---

	diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
	--- a/arch/arm/tools/syscall.tbl 2017-12-28 15:53:02.000000000 +0000
	+++ b/arch/arm/tools/syscall.tbl 2020-10-30 03:59:25.357442520 +0000
	@@ -412,3 +412,5 @@
	395 common pkey_alloc sys_pkey_alloc
	396 common pkey_free sys_pkey_free
	397 common statx sys_statx
	+425 common io_uring_setup sys_io_uring_setup
	+426 common io_uring_enter sys_io_uring_enter
	diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
	--- a/arch/arm64/include/asm/unistd32.h 2017-12-28 15:53:02.000000000 +0000
	+++ b/arch/arm64/include/asm/unistd32.h 2020-10-30 04:02:43.711946882 +0000
	@@ -817,6 +817,10 @@
	__SYSCALL(__NR_pkey_free, sys_pkey_free)
	#define __NR_statx 397
	__SYSCALL(__NR_statx, sys_statx)
	+#define __NR_io_uring_setup 425
	+__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
	+#define __NR_io_uring_enter 426
	+__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)

	/*
	* Please add new compat syscalls above this comment and update
	diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
	--- a/arch/arm64/include/asm/unistd.h 2017-12-28 15:53:02.000000000 +0000
	+++ b/arch/arm64/include/asm/unistd.h 2020-10-30 04:02:56.152354901 +0000
	@@ -43,7 +43,7 @@
	#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2)
	#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5)

	-#define __NR_compat_syscalls 398
	+#define __NR_compat_syscalls 427
	#endif

	#define __ARCH_WANT_SYS_CLONE
	diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
	--- a/arch/x86/entry/syscalls/syscall_32.tbl 2017-12-28 15:53:02.000000000 +0000
	+++ b/arch/x86/entry/syscalls/syscall_32.tbl 2020-10-30 03:58:02.198716480 +0000
	@@ -391,3 +391,5 @@
	382 i386 pkey_free sys_pkey_free
	383 i386 statx sys_statx
	384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
	+425 i386 io_uring_setup sys_io_uring_setup __ia32_sys_io_uring_setup
	+426 i386 io_uring_enter sys_io_uring_enter __ia32_sys_io_uring_enter
	diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
	--- a/arch/x86/entry/syscalls/syscall_64.tbl 2017-12-28 15:53:02.000000000 +0000
	+++ b/arch/x86/entry/syscalls/syscall_64.tbl 2020-10-30 03:49:50.902629025 +0000
	@@ -339,6 +339,8 @@
	330 common pkey_alloc sys_pkey_alloc
	331 common pkey_free sys_pkey_free
	332 common statx sys_statx
	+425 common io_uring_setup __x64_sys_io_uring_setup
	+426 common io_uring_enter __x64_sys_io_uring_enter

	#
	# x32-specific system call numbers start at 512 to avoid cache impact
	diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
	index 257cccba3062..3072dbaa7869 100644
	--- a/include/linux/syscalls.h
	+++ b/include/linux/syscalls.h
	@@ -67,6 +67,7 @@ struct file_handle;
	struct file_handle;
	struct sigaltstack;
	union bpf_attr;
	+struct io_uring_params;

	#include <linux/types.h>
	#include <linux/aio_abi.h>
	@@ -543,6 +544,11 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
	struct iocb __user * __user *);
	asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
	struct io_event __user *result);
	+asmlinkage long sys_io_uring_setup(u32 entries,
	+ struct io_uring_params __user *p);
	+asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
	+ u32 min_complete, u32 flags,
	+ const sigset_t __user *sig, size_t sigsz);
	asmlinkage long sys_sendfile(int out_fd, int in_fd,
	off_t __user *offset, size_t count);
	asmlinkage long sys_sendfile64(int out_fd, int in_fd,
	diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
	index d90127298f12..87871e7b7ea7 100644
	--- a/include/uapi/asm-generic/unistd.h
	+++ b/include/uapi/asm-generic/unistd.h
	@@ -732,9 +732,13 @@ __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
	__SYSCALL(__NR_pkey_free, sys_pkey_free)
	#define __NR_kexec_file_load 291
	__SYSCALL(__NR_statx, sys_statx)
	+#define __NR_io_uring_setup 425
	+__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
	+#define __NR_io_uring_enter 426
	+__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)

	#undef __NR_syscalls
	-#define __NR_syscalls 292
	+#define __NR_syscalls 427

	/*
	* 32 bit systems traditionally used different
	--
	2.20.1