movefile: support in-kernel file copying on Linux (bug 607868)
Perform in-kernel file copying when possible, and also support
reflinks and sparse files. If the optimized implementation
fails at runtime, gracefully fallback to a plain read/write
loop.
Compile-time and run-time fallbacks are implemented, so that
any incompatiblities will be handled gracefully. For example,
if the code is compiled on a system that supports the
copy_file_range syscall, but at run-time an older kernel that
does not support this syscall is detected, it will be handled
gracefully. There are similar fallbacks for lack of lseek
SEEK_DATA and sendfile support.
X-Gentoo-Bug: 607868
X-Gentoo-Bug-Url: https://bugs.gentoo.org/show_bug.cgi?id=607868
Acked-by: Brian Dolbec <dolsen@gentoo.org>
diff --git a/pym/portage/tests/util/file_copy/__init__.py b/pym/portage/tests/util/file_copy/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/pym/portage/tests/util/file_copy/__init__.py
diff --git a/pym/portage/tests/util/file_copy/__test__.py b/pym/portage/tests/util/file_copy/__test__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/pym/portage/tests/util/file_copy/__test__.py
diff --git a/pym/portage/tests/util/file_copy/test_copyfile.py b/pym/portage/tests/util/file_copy/test_copyfile.py
new file mode 100644
index 0000000..b900fde
--- /dev/null
+++ b/pym/portage/tests/util/file_copy/test_copyfile.py
@@ -0,0 +1,71 @@
+# Copyright 2017 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+import shutil
+import tempfile
+
+from portage import os
+from portage.tests import TestCase
+from portage.checksum import perform_md5
+from portage.util.file_copy import copyfile
+
+
+class CopyFileTestCase(TestCase):
+
+ def testCopyFile(self):
+
+ tempdir = tempfile.mkdtemp()
+ try:
+ src_path = os.path.join(tempdir, 'src')
+ dest_path = os.path.join(tempdir, 'dest')
+ content = b'foo'
+
+ with open(src_path, 'wb') as f:
+ f.write(content)
+
+ copyfile(src_path, dest_path)
+
+ self.assertEqual(perform_md5(src_path), perform_md5(dest_path))
+ finally:
+ shutil.rmtree(tempdir)
+
+
+class CopyFileSparseTestCase(TestCase):
+
+ def testCopyFileSparse(self):
+
+ tempdir = tempfile.mkdtemp()
+ try:
+ src_path = os.path.join(tempdir, 'src')
+ dest_path = os.path.join(tempdir, 'dest')
+ content = b'foo'
+
+ # Use seek to create some sparse blocks. Don't make these
+ # files too big, in case the filesystem doesn't support
+ # sparse files.
+ with open(src_path, 'wb') as f:
+ f.write(content)
+ f.seek(2**17, 1)
+ f.write(content)
+ f.seek(2**18, 1)
+ f.write(content)
+ # Test that sparse blocks are handled correctly at
+ # the end of the file (involves seek and truncate).
+ f.seek(2**17, 1)
+
+ copyfile(src_path, dest_path)
+
+ self.assertEqual(perform_md5(src_path), perform_md5(dest_path))
+
+ # This last part of the test is expected to fail when sparse
+ # copy is not implemented, so set the todo flag in order
+ # to tolerate failures.
+ self.todo = True
+
+ # If sparse blocks were preserved, then both files should
+ # consume the same number of blocks.
+ self.assertEqual(
+ os.stat(src_path).st_blocks,
+ os.stat(dest_path).st_blocks)
+ finally:
+ shutil.rmtree(tempdir)
diff --git a/pym/portage/util/file_copy/__init__.py b/pym/portage/util/file_copy/__init__.py
new file mode 100644
index 0000000..3d9b745
--- /dev/null
+++ b/pym/portage/util/file_copy/__init__.py
@@ -0,0 +1,36 @@
+# Copyright 2017 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+import os
+import shutil
+import tempfile
+
+try:
+ from portage.util.file_copy.reflink_linux import file_copy as _file_copy
+except ImportError:
+ _file_copy = None
+
+
+def _optimized_copyfile(src, dst):
+ """
+ Copy the contents (no metadata) of the file named src to a file
+ named dst.
+
+ If possible, copying is done within the kernel, and uses
+ "copy acceleration" techniques (such as reflinks). This also
+ supports sparse files.
+
+ @param src: path of source file
+ @type src: str
+ @param dst: path of destination file
+ @type dst: str
+ """
+ with open(src, 'rb', buffering=0) as src_file, \
+ open(dst, 'wb', buffering=0) as dst_file:
+ _file_copy(src_file.fileno(), dst_file.fileno())
+
+
+if _file_copy is None:
+ copyfile = shutil.copyfile
+else:
+ copyfile = _optimized_copyfile
diff --git a/pym/portage/util/movefile.py b/pym/portage/util/movefile.py
index 4be1c3b..37c809e 100644
--- a/pym/portage/util/movefile.py
+++ b/pym/portage/util/movefile.py
@@ -8,7 +8,6 @@
import errno
import fnmatch
import os as _os
-import shutil as _shutil
import stat
import sys
import textwrap
@@ -23,6 +22,8 @@
from portage.process import spawn
from portage.util import writemsg
from portage.util._xattr import xattr
+from portage.util.file_copy import copyfile
+
def _apply_stat(src_stat, dest):
_os.chown(dest, src_stat.st_uid, src_stat.st_gid)
@@ -114,7 +115,7 @@
_copyfile = selinux.copyfile
_rename = selinux.rename
else:
- _copyfile = _shutil.copyfile
+ _copyfile = copyfile
_rename = _os.rename
lchown = _unicode_func_wrapper(portage.data.lchown, encoding=encoding)
diff --git a/setup.py b/setup.py
index a346bd4..b624767 100755
--- a/setup.py
+++ b/setup.py
@@ -23,6 +23,7 @@
import glob
import os
import os.path
+import platform
import re
import subprocess
import sys
@@ -54,6 +55,14 @@
],
}
+if platform.system() == 'Linux':
+ x_c_helpers.update({
+ 'portage.util.file_copy.reflink_linux': [
+ 'src/portage_util_file_copy_reflink_linux.c',
+ ],
+ })
+
+
class x_build(build):
""" Build command with extra build_man call. """
diff --git a/src/portage_util_file_copy_reflink_linux.c b/src/portage_util_file_copy_reflink_linux.c
new file mode 100644
index 0000000..b031d96
--- /dev/null
+++ b/src/portage_util_file_copy_reflink_linux.c
@@ -0,0 +1,385 @@
+/* Copyright 2017 Gentoo Foundation
+ * Distributed under the terms of the GNU General Public License v2
+ */
+
+#include <Python.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <sys/sendfile.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+static PyObject * _reflink_linux_file_copy(PyObject *, PyObject *);
+
+static PyMethodDef reflink_linuxMethods[] = {
+ {
+ "file_copy",
+ _reflink_linux_file_copy,
+ METH_VARARGS,
+ "Copy between two file descriptors, "
+ "with reflink and sparse file support."
+ },
+ {NULL, NULL, 0, NULL}
+};
+
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef moduledef = {
+ PyModuleDef_HEAD_INIT,
+ "reflink_linux", /* m_name */
+ "Module for reflink_linux copy operations", /* m_doc */
+ -1, /* m_size */
+ reflink_linuxMethods, /* m_methods */
+ NULL, /* m_reload */
+ NULL, /* m_traverse */
+ NULL, /* m_clear */
+ NULL, /* m_free */
+};
+
+PyMODINIT_FUNC
+PyInit_reflink_linux(void)
+{
+ PyObject *m;
+ m = PyModule_Create(&moduledef);
+ return m;
+}
+#else
+PyMODINIT_FUNC
+initreflink_linux(void)
+{
+ Py_InitModule("reflink_linux", reflink_linuxMethods);
+}
+#endif
+
+
+/**
+ * cfr_wrapper - A copy_file_range syscall wrapper function, having a
+ * function signature that is compatible with sendfile.
+ * @fd_out: output file descriptor
+ * @fd_in: input file descriptor
+ * @off_out: offset of the output file
+ * @len: number of bytes to copy between the file descriptors
+ *
+ * Return: Number of bytes written to out_fd on success, -1 on failure
+ * (errno is set appropriately).
+ */
+static ssize_t
+cfr_wrapper(int fd_out, int fd_in, loff_t *off_out, size_t len)
+{
+#ifdef __NR_copy_file_range
+ return syscall(__NR_copy_file_range, fd_in, NULL, fd_out,
+ off_out, len, 0);
+#else
+ /* This is how it fails at runtime when the syscall is not supported. */
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+/**
+ * do_lseek_data - Adjust file offsets to the next location containing
+ * data, creating sparse empty blocks in the output file as needed.
+ * @fd_in: input file descriptor
+ * @fd_out: output file descriptor
+ * @off_out: offset of the output file
+ *
+ * Use lseek SEEK_DATA to adjust the fd_in file offset to the next
+ * location containing data, and adjust the fd_in file offset and
+ * off_out to the same location (creating sparse empty blocks as
+ * needed). On success, both fd_in and fd_out file offsets are
+ * guaranteed to be exactly equal to the value that off_out points to.
+ *
+ * Return: On success, the number of bytes to copy before the next hole,
+ * and -1 on failure (errno is set appropriately). Returns 0 when fd_in
+ * reaches EOF.
+ */
+static off_t
+do_lseek_data(int fd_out, int fd_in, loff_t *off_out) {
+#ifdef SEEK_DATA
+ /* Use lseek SEEK_DATA/SEEK_HOLE for sparse file support,
+ * as suggested in the copy_file_range man page.
+ */
+ off_t offset_data, offset_hole;
+
+ offset_data = lseek(fd_in, *off_out, SEEK_DATA);
+ if (offset_data < 0) {
+ if (errno == ENXIO) {
+ /* EOF - If the file ends with a hole, then use lseek SEEK_END
+ * to find the end offset, and create sparse empty blocks in
+ * the output file. It's the caller's responsibility to
+ * truncate the file.
+ */
+ offset_hole = lseek(fd_in, 0, SEEK_END);
+ if (offset_hole < 0) {
+ return -1;
+ } else if (offset_hole != *off_out) {
+ if (lseek(fd_out, offset_hole, SEEK_SET) < 0) {
+ return -1;
+ }
+ *off_out = offset_hole;
+ }
+ return 0;
+ }
+ return -1;
+ }
+
+ /* Create sparse empty blocks in the output file, up
+ * until the next location that will contain data.
+ */
+ if (offset_data != *off_out) {
+ if (lseek(fd_out, offset_data, SEEK_SET) < 0) {
+ return -1;
+ }
+ *off_out = offset_data;
+ }
+
+ /* Locate the next hole, so that we know when to
+ * stop copying. There is an implicit hole at the
+ * end of the file. This should never result in ENXIO
+ * after SEEK_DATA has succeeded above.
+ */
+ offset_hole = lseek(fd_in, offset_data, SEEK_HOLE);
+ if (offset_hole < 0) {
+ return -1;
+ }
+
+ /* Revert SEEK_HOLE offset change, since we're going
+ * to copy the data that comes before the hole.
+ */
+ if (lseek(fd_in, offset_data, SEEK_SET) < 0) {
+ return -1;
+ }
+
+ return offset_hole - offset_data;
+#else
+ /* This is how it fails at runtime when lseek SEEK_DATA is not supported. */
+ errno = EINVAL;
+ return -1;
+#endif
+}
+
+
+/**
+ * _reflink_linux_file_copy - Copy between two file descriptors, with
+ * reflink and sparse file support.
+ * @fd_in: input file descriptor
+ * @fd_out: output file descriptor
+ *
+ * When supported, this uses copy_file_range for reflink support,
+ * and lseek SEEK_DATA for sparse file support. It has graceful
+ * fallbacks when support is unavailable for copy_file_range, lseek
+ * SEEK_DATA, or sendfile operations. When all else fails, it uses
+ * a plain read/write loop that works in any kernel version.
+ *
+ * If a syscall is interrupted by a signal, then the function will
+ * automatically resume copying a the appropriate location which is
+ * tracked internally by the offset_out variable.
+ *
+ * Return: The length of the output file on success. Raise OSError
+ * on failure.
+ */
+static PyObject *
+_reflink_linux_file_copy(PyObject *self, PyObject *args)
+{
+ int eintr_retry, error, fd_in, fd_out, stat_in_acquired, stat_out_acquired;
+ int lseek_works, sendfile_works;
+ off_t offset_out, len;
+ ssize_t buf_bytes, buf_offset, copyfunc_ret;
+ struct stat stat_in, stat_out;
+ char* buf;
+ ssize_t (*copyfunc)(int, int, loff_t *, size_t);
+
+ if (!PyArg_ParseTuple(args, "ii", &fd_in, &fd_out))
+ return NULL;
+
+ eintr_retry = 1;
+ offset_out = 0;
+ stat_in_acquired = 0;
+ stat_out_acquired = 0;
+ buf = NULL;
+ buf_bytes = 0;
+ buf_offset = 0;
+ copyfunc = cfr_wrapper;
+ lseek_works = 1;
+ sendfile_works = 1;
+
+ while (eintr_retry) {
+
+ Py_BEGIN_ALLOW_THREADS
+
+ /* Linux 3.1 and later support SEEK_DATA (for sparse file support).
+ * This code uses copy_file_range if possible, and falls back to
+ * sendfile for cross-device or when the copy_file_range syscall
+ * is not available (less than Linux 4.5). This will fail for
+ * Linux less than 3.1, which does not support the lseek SEEK_DATA
+ * parameter.
+ */
+ if (sendfile_works && lseek_works) {
+ error = 0;
+
+ while (1) {
+ len = do_lseek_data(fd_out, fd_in, &offset_out);
+ if (!len) {
+ /* EOF */
+ break;
+ } else if (len < 0) {
+ error = errno;
+ if (errno == EINVAL && !offset_out) {
+ lseek_works = 0;
+ }
+ break;
+ }
+
+ /* For the copyfunc call, the fd_in file offset must be
+ * exactly equal to offset_out. The above do_lseek_data
+ * function guarantees correct state.
+ */
+ copyfunc_ret = copyfunc(fd_out,
+ fd_in,
+ &offset_out,
+ len);
+
+ if (copyfunc_ret < 0) {
+ error = errno;
+ if ((errno == EXDEV || errno == ENOSYS) &&
+ copyfunc == cfr_wrapper) {
+ /* Use sendfile instead of copy_file_range for
+ * cross-device copies, or when the copy_file_range
+ * syscall is not available (less than Linux 4.5).
+ */
+ error = 0;
+ copyfunc = sendfile;
+ copyfunc_ret = copyfunc(fd_out,
+ fd_in,
+ &offset_out,
+ len);
+
+ if (copyfunc_ret < 0) {
+ error = errno;
+ /* On Linux, if lseek succeeded above, then
+ * sendfile should have worked here too, so
+ * don't bother to fallback for EINVAL here.
+ */
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ }
+
+ /* Less than Linux 3.1 does not support SEEK_DATA or copy_file_range,
+ * so just use sendfile for in-kernel copy. This will fail for Linux
+ * versions from 2.6.0 to 2.6.32, because sendfile does not support
+ * writing to regular files.
+ */
+ if (sendfile_works && !lseek_works) {
+ error = 0;
+
+ if (!stat_in_acquired && fstat(fd_in, &stat_in) < 0) {
+ error = errno;
+ } else {
+ stat_in_acquired = 1;
+
+ /* For the sendfile call, the fd_in file offset must be
+ * exactly equal to offset_out. Use lseek to ensure
+ * correct state, in case an EINTR retry caused it to
+ * get out of sync somewhow.
+ */
+ if (lseek(fd_in, offset_out, SEEK_SET) < 0) {
+ error = errno;
+ } else {
+ while (offset_out < stat_in.st_size) {
+ copyfunc_ret = sendfile(fd_out,
+ fd_in,
+ &offset_out,
+ stat_in.st_size - offset_out);
+
+ if (copyfunc_ret < 0) {
+ error = errno;
+ if (errno == EINVAL && !offset_out) {
+ sendfile_works = 0;
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* This implementation will work on any kernel. */
+ if (!sendfile_works) {
+ error = 0;
+
+ if (!stat_out_acquired && fstat(fd_in, &stat_out) < 0) {
+ error = errno;
+ } else {
+ stat_out_acquired = 1;
+ if (buf == NULL)
+ buf = malloc(stat_out.st_blksize);
+ if (buf == NULL) {
+ error = errno;
+
+ /* For the read call, the fd_in file offset must be
+ * exactly equal to offset_out. Use lseek to ensure
+ * correct state, in case an EINTR retry caused it to
+ * get out of sync somewhow.
+ */
+ } else if (lseek(fd_in, offset_out, SEEK_SET) < 0) {
+ error = errno;
+ } else {
+ while (1) {
+ /* Some bytes may still be buffered from the
+ * previous iteration of the outer loop.
+ */
+ if (!buf_bytes) {
+ buf_offset = 0;
+ buf_bytes = read(fd_in, buf, stat_out.st_blksize);
+
+ if (!buf_bytes) {
+ /* EOF */
+ break;
+
+ } else if (buf_bytes < 0) {
+ error = errno;
+ break;
+ }
+ }
+
+ copyfunc_ret = write(fd_out,
+ buf + buf_offset,
+ buf_bytes);
+
+ if (copyfunc_ret < 0) {
+ error = errno;
+ break;
+ }
+
+ buf_bytes -= copyfunc_ret;
+ buf_offset += copyfunc_ret;
+ offset_out += copyfunc_ret;
+ }
+ }
+ }
+ }
+
+ if (!error && ftruncate(fd_out, offset_out) < 0)
+ error = errno;
+
+ Py_END_ALLOW_THREADS
+
+ if (!(error == EINTR && PyErr_CheckSignals() == 0))
+ eintr_retry = 0;
+ }
+
+ if (buf != NULL)
+ free(buf);
+
+ if (error)
+ return PyErr_SetFromErrno(PyExc_OSError);
+
+ return Py_BuildValue("i", offset_out);
+}