blob: a82fc495685ac5091fcda5af055cfca343667ced [file] [log] [blame]
Reduced bspatch memory footprint via lazy reading of old file data.
--- bspatch.1
+++ bspatch.1
@@ -33,24 +33,42 @@
.Nd apply a patch built with bsdiff(1)
.Sh SYNOPSIS
.Nm
-.Ao Ar oldfile Ac Ao Ar newfile Ac Ao Ar patchfile Ac
+.Ar oldfile newfile patchfile
+.Op Ar old-extents new-extents
.Sh DESCRIPTION
.Nm
generates
-.Ao Ar newfile Ac
+.Ar newfile
from
-.Ao Ar oldfile Ac
+.Ar oldfile
and
-.Ao Ar patchfile Ac
+.Ar patchfile ,
where
-.Ao Ar patchfile Ac
-is a binary patch built by bsdiff(1).
+.Ar patchfile
+is a binary patch built by
+.Xr bsdiff 1 .
+.Pp
+When provided,
+.Ar old-extents
+and
+.Ar new-extents
+instruct
+.Nm
+to read specific chunks of data from the old file and to write to specific
+locations in the new file, respectively. Each is a comma-separated list of
+extents of the form
+.Ar offset : Ns Ar length ,
+where
+.Ar offset
+is either -1 or a non-negative integer and
+.Ar length
+is a positive integer. An offset value of -1 denotes a sparse extent, namely a
+sequence of zeros that entails neither reading nor writing of actual file
+content.
.Pp
.Nm
uses memory equal to the size of
-.Ao Ar oldfile Ac
-plus the size of
-.Ao Ar newfile Ac ,
+.Ar newfile ,
but can tolerate a very small working set without a dramatic loss
of performance.
.Sh SEE ALSO
--- bspatch.c
+++ bspatch.c
@@ -29,12 +29,17 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:
#endif
#include <bzlib.h>
-#include <stdlib.h>
+#include <err.h>
+#include <fcntl.h>
+#include <inttypes.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
-#include <err.h>
#include <unistd.h>
-#include <fcntl.h>
+
+#include "exfile.h"
+#include "extents.h"
+
static off_t offtin(u_char *buf)
{
@@ -54,22 +59,43 @@ static off_t offtin(u_char *buf)
return y;
}
+/* Parses an extent string ex_str, returning a pointer to a newly allocated
+ * array of extents. The number of extents is stored in ex_count_p (if
+ * provided). */
+static ex_t *parse_extent_str(const char *ex_str, size_t *ex_count_p)
+{
+ size_t ex_count = (size_t)-1;
+ ex_t *ex_arr = extents_parse(ex_str, NULL, &ex_count);
+ if (!ex_arr)
+ errx(1, (ex_count == (size_t)-1 ?
+ "error parsing extents" :
+ "error allocating extent array"));
+ if (ex_count_p)
+ *ex_count_p = ex_count;
+ return ex_arr;
+}
+
+#define USAGE_TEMPLATE_STR \
+ "usage: %s oldfile newfile patchfile [old-extents new-extents]\n" \
+ "with extents taking the form \"off_1:len_1,...,off_n:len_n\"\n"
+
int main(int argc,char * argv[])
{
FILE * f, * cpf, * dpf, * epf;
BZFILE * cpfbz2, * dpfbz2, * epfbz2;
int cbz2err, dbz2err, ebz2err;
- int fd;
+ FILE *old_file = NULL, *new_file = NULL;
ssize_t oldsize,newsize;
ssize_t bzctrllen,bzdatalen;
u_char header[32],buf[8];
- u_char *old, *new;
+ u_char *new;
off_t oldpos,newpos;
off_t ctrl[3];
off_t lenread;
- off_t i;
+ off_t i, j;
- if(argc!=4) errx(1,"usage: %s oldfile newfile patchfile\n",argv[0]);
+ if ((argc != 6) && (argc != 4)) errx(1, USAGE_TEMPLATE_STR, argv[0]);
+ int using_extents = (argc == 6);
/* Open patch file */
if ((f = fopen(argv[3], "r")) == NULL)
@@ -132,12 +158,21 @@ int main(int argc,char * argv[])
if ((epfbz2 = BZ2_bzReadOpen(&ebz2err, epf, 0, 0, NULL, 0)) == NULL)
errx(1, "BZ2_bzReadOpen, bz2err = %d", ebz2err);
- if(((fd=open(argv[1],O_RDONLY,0))<0) ||
- ((oldsize=lseek(fd,0,SEEK_END))==-1) ||
- ((old=malloc(oldsize+1))==NULL) ||
- (lseek(fd,0,SEEK_SET)!=0) ||
- (read(fd,old,oldsize)!=oldsize) ||
- (close(fd)==-1)) err(1,"%s",argv[1]);
+ /* Open input file for reading. */
+ if (using_extents) {
+ size_t ex_count = 0;
+ ex_t *ex_arr = parse_extent_str(argv[4], &ex_count);
+ old_file = exfile_fopen(argv[1], "r", ex_arr, ex_count, free);
+ } else {
+ old_file = fopen(argv[1], "r");
+ }
+ if (!old_file ||
+ fseek(old_file, 0, SEEK_END) != 0 ||
+ (oldsize = ftell(old_file)) < 0 ||
+ fseek(old_file, 0, SEEK_SET) != 0)
+ err(1, "cannot obtain the size of %s", argv[1]);
+ off_t old_file_pos = 0;
+
if((new=malloc(newsize+1))==NULL) err(1,NULL);
oldpos=0;newpos=0;
@@ -161,10 +196,25 @@ int main(int argc,char * argv[])
((dbz2err != BZ_OK) && (dbz2err != BZ_STREAM_END)))
errx(1, "Corrupt patch\n");
- /* Add old data to diff string */
- for(i=0;i<ctrl[0];i++)
- if((oldpos+i>=0) && (oldpos+i<oldsize))
- new[newpos+i]+=old[oldpos+i];
+ /* Add old data to diff string. It is enough to fseek once, at
+ * the beginning of the sequence, to avoid unnecessary
+ * overhead. */
+ j = newpos;
+ if ((i = oldpos) < 0) {
+ j -= i;
+ i = 0;
+ }
+ if (i != old_file_pos && fseek(old_file, i, SEEK_SET) < 0)
+ err(1, "error seeking input file to offset %" PRIdMAX,
+ (intmax_t)i);
+ if ((old_file_pos = oldpos + ctrl[0]) > oldsize)
+ old_file_pos = oldsize;
+ while (i++ < old_file_pos) {
+ u_char c;
+ if (fread_unlocked(&c, 1, 1, old_file) != 1)
+ err(1, "error reading from input file");
+ new[j++] += c;
+ }
/* Adjust pointers */
newpos+=ctrl[0];
@@ -185,6 +235,9 @@ int main(int argc,char * argv[])
oldpos+=ctrl[2];
};
+ /* Close input file. */
+ fclose(old_file);
+
/* Clean up the bzip2 reads */
BZ2_bzReadClose(&cbz2err, cpfbz2);
BZ2_bzReadClose(&dbz2err, dpfbz2);
@@ -193,12 +246,19 @@ int main(int argc,char * argv[])
err(1, "fclose(%s)", argv[3]);
/* Write the new file */
- if(((fd=open(argv[2],O_CREAT|O_TRUNC|O_WRONLY,0666))<0) ||
- (write(fd,new,newsize)!=newsize) || (close(fd)==-1))
+ if (using_extents) {
+ size_t ex_count = 0;
+ ex_t *ex_arr = parse_extent_str(argv[5], &ex_count);
+ new_file = exfile_fopen(argv[2], "w", ex_arr, ex_count, free);
+ } else {
+ new_file = fopen(argv[2], "w");
+ }
+ if (!new_file ||
+ fwrite_unlocked(new, 1, newsize, new_file) != newsize ||
+ fclose(new_file) == EOF)
err(1,"%s",argv[2]);
free(new);
- free(old);
return 0;
}
new file mode 100644
--- /dev/null
+++ exfile.c
@@ -0,0 +1,413 @@
+#define _GNU_SOURCE
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "exfile.h"
+
+/*
+ * Extent files implementation. Some things worth noting:
+ *
+ * - We are using glibc's buffered FILE objects for the underlying file I/O;
+ * this contributes to improved performance, especially with badly fragmented
+ * extents. However, the FILE handle we return to the caller is decidedly
+ * unbuffered: making it buffered too seems superfluous, causing excess data
+ * copying and memory use.
+ *
+ * - We maintain the "logical" file position separately from the "physical"
+ * (underlying) file position. The latter is updated lazily whenever actual
+ * file I/O is about to be performed.
+ *
+ * - The logical position of an extent file is internally represented by the
+ * current extent index (curr_ex_idx) and the position within the current
+ * extent (curr_ex_pos), as well as an absolute logical position (curr_pos).
+ * In general, curr_pos should equal the total length of all extents prior to
+ * curr_ex_idx, plus curr_ex_pos. Also, curr_ex_idx may range between 0 and
+ * the total extent count; if it is exactly the latter, then curr_ex_pos must
+ * be zero, representing the fact that the we are at the logical end of the
+ * file. Otherwise, curr_ex_pos may range between 0 and the length of the
+ * current extent; if it is exactly the latter, then this is equivalent to
+ * position zero on the next extent. All functions should honor this
+ * duality.
+ *
+ * - Seeking is done efficiently at O(log(D)), where D is the
+ * number of extents between the current position and the new one. This seems
+ * like a good midway for supporting both sequential and random access.
+ */
+
+
+#define TRUE 1
+#define FALSE 0
+
+#define arraysize(x) (sizeof(x) / sizeof(*(x)))
+
+
+/* Extent prefix length. */
+typedef struct {
+ size_t prec; /* total length of preceding extents */
+ size_t total; /* total length including current extent */
+} prefix_len_t;
+
+/* Extent file logical modes. Used as index to the mapping from logical modes
+ * to open(2) and fopen(3) modes below. */
+typedef enum {
+ EXFILE_MODE_RO,
+ EXFILE_MODE_WO,
+ EXFILE_MODE_RW,
+ EXFILE_MODE_MAX /* sentinel */
+} exfile_mode_t;
+
+/* An extent file control object (aka "cookie"). */
+typedef struct {
+ int fd; /* underlying file descriptor */
+ size_t ex_count; /* number of extents (non-zero) */
+ ex_t *ex_arr; /* array of extents */
+ prefix_len_t *prefix_len_arr; /* total lengths of extent prefixes */
+ void (*ex_free)(void *); /* extent array free function */
+ size_t total_ex_len; /* total length of all extents (constant) */
+ off_t curr_file_pos; /* current underlying file position */
+ size_t curr_ex_idx; /* current extent index */
+ size_t curr_ex_pos; /* current position within extent */
+ off_t curr_pos; /* current logical file position */
+} exfile_t;
+
+
+/* Mapping from fopen(3) modes to extent file logical modes. */
+static const struct {
+ const char *fopen_mode;
+ exfile_mode_t mode;
+} fopen_mode_to_mode[] = {
+ {"r", EXFILE_MODE_RO},
+ {"r+", EXFILE_MODE_RW},
+ {"w", EXFILE_MODE_WO},
+ {"w+", EXFILE_MODE_RW},
+};
+
+
+/* Mapping from extent file logical modes to open(2) flags. */
+static const int mode_to_open_flags[EXFILE_MODE_MAX] = {
+ O_RDONLY,
+ O_WRONLY,
+ O_RDWR,
+};
+
+
+/* Searches an array |ex_arr| of |ex_count| extents and returns the index of
+ * the extent that contains the location |pos|. Uses an array |prefix_len_arr|
+ * of corresponding prefix lengths. The total complexity is O(log(D)), where D
+ * is the distance between the returned extent index and |init_ex_idx|. */
+static size_t
+ex_arr_search(size_t ex_count, const ex_t *ex_arr,
+ const prefix_len_t *prefix_len_arr, size_t pos,
+ size_t init_ex_idx)
+{
+ assert(ex_arr && ex_count);
+ const size_t last_ex_idx = ex_count - 1;
+ assert(init_ex_idx <= ex_count);
+ assert(pos < prefix_len_arr[last_ex_idx].total);
+ if (init_ex_idx == ex_count)
+ init_ex_idx = last_ex_idx; /* adjustment for purposes of the search below */
+
+ /* First, search in exponentially increasing leaps from the current extent,
+ * until an interval bounding the target position was obtained. Here i and j
+ * are the left and right (inclusive) index boundaries, respectively. */
+ ssize_t i = init_ex_idx;
+ ssize_t j = i;
+ size_t leap = 1;
+ /* Go left, as needed. */
+ while (i > 0 && pos < prefix_len_arr[i].prec) {
+ j = i - 1;
+ if ((i -= leap) < 0)
+ i = 0;
+ leap <<= 1;
+ }
+ /* Go right, as needed. */
+ while (j < last_ex_idx && pos >= prefix_len_arr[j].total) {
+ i = j + 1;
+ if ((j += leap) > last_ex_idx)
+ j = last_ex_idx;
+ leap <<= 1;
+ }
+
+ /* Then, perform a binary search between i and j. */
+ size_t k = 0;
+ while (1) {
+ k = (i + j) / 2;
+ if (pos < prefix_len_arr[k].prec)
+ j = k - 1;
+ else if (pos >= prefix_len_arr[k].total)
+ i = k + 1;
+ else
+ break;
+ }
+
+ return k;
+}
+
+/* Performs I/O operations (either read or write) on an extent file, advancing
+ * through consecutive extents and updating the logical/physical file position
+ * as we go. */
+static ssize_t
+exfile_io(exfile_t *xf, int do_read, char *buf, size_t size)
+{
+ if (xf->curr_ex_idx == xf->ex_count)
+ return 0; /* end-of-extent-file */
+
+ /* Reading or writing? */
+ typedef ssize_t (io_func_t)(int, void *, size_t);
+ io_func_t *io_func;
+ ssize_t error_ret_val;
+ if (do_read) {
+ io_func = read;
+ error_ret_val = -1;
+ } else {
+ io_func = (io_func_t *)write;
+ error_ret_val = 0; /* must not return a negative value when writing */
+ }
+
+ /* Start processing data along extents. */
+ const ex_t *curr_ex = xf->ex_arr + xf->curr_ex_idx;
+ assert(curr_ex->len >= xf->curr_ex_pos);
+ size_t curr_ex_rem_len = curr_ex->len - xf->curr_ex_pos;
+ ssize_t total_bytes = 0;
+ while (size) {
+ /* Advance to the next extent of non-zero length. */
+ while (curr_ex_rem_len == 0) {
+ xf->curr_ex_idx++;
+ xf->curr_ex_pos = 0;
+ if (xf->curr_ex_idx == xf->ex_count)
+ return total_bytes; /* end-of-extent-file */
+ curr_ex++;
+ curr_ex_rem_len = curr_ex->len;
+ }
+
+ const int is_real_ex = (curr_ex->off >= 0);
+
+ /* Seek to the correct file position, as necessary. */
+ if (is_real_ex) {
+ const off_t file_pos = curr_ex->off + xf->curr_ex_pos;
+ if (xf->curr_file_pos != file_pos) {
+ if (lseek(xf->fd, file_pos, SEEK_SET) == (off_t)-1) {
+ xf->curr_file_pos = -1; /* unknown file position */
+ return total_bytes ? total_bytes : error_ret_val;
+ }
+ xf->curr_file_pos = file_pos;
+ }
+ }
+
+ /* Process data to the end of the current extent or the requested
+ * count, whichever is smaller. */
+ size_t io_count = (size < curr_ex_rem_len ? size : curr_ex_rem_len);
+ ssize_t io_bytes = io_count;
+ if (is_real_ex)
+ io_bytes = io_func(xf->fd, buf, io_count);
+ else if (do_read)
+ memset(buf, 0, io_count);
+
+ /* Stop on error. */
+ if (io_bytes < 0) {
+ if (total_bytes == 0)
+ total_bytes = error_ret_val;
+ break;
+ }
+
+ /* Update read state. */
+ total_bytes += io_bytes;
+ if (is_real_ex)
+ xf->curr_file_pos += io_bytes;
+ xf->curr_ex_pos += io_bytes;
+ xf->curr_pos += io_bytes;
+
+ /* If we didn't read the whole extent, finish; delegate handling of
+ * partial read/write back to the caller. */
+ if ((curr_ex_rem_len -= io_bytes) > 0)
+ break;
+
+ /* Update total count and buffer pointer. */
+ size -= io_bytes;
+ buf += io_bytes;
+ }
+
+ return total_bytes;
+}
+
+/* Reads up to |size| bytes from an extent file into |buf|. This implements the
+ * cookie_read_function_t interface and is a thin wrapper around exfile_io()
+ * (see above). Returns the number of bytes read, or -1 on error. */
+static ssize_t
+exfile_read(void *cookie, char *buf, size_t size)
+{
+ return exfile_io((exfile_t *)cookie, TRUE, buf, size);
+}
+
+/* Writes up to |size| bytes from |buf| to an extent file. This implements the
+ * cookie_write_function_t interface and is a thin wrapper around exfile_io()
+ * (see above). Returns the number of bytes written; must NOT return a negative
+ * value. */
+static ssize_t
+exfile_write(void *cookie, const char *buf, size_t size)
+{
+ return exfile_io((exfile_t *)cookie, FALSE, (char *)buf, size);
+}
+
+/* Performs seek on an extent file, repositioning it to the value of |*pos_p|
+ * according to |whence|. This implements the cookie_seek_function_t interface.
+ * On success, stores the resulting logical position measured in bytes along
+ * contiguous extents into |*pos_p| and returns 0; otherwise returns -1. */
+static int
+exfile_seek(void *cookie, off64_t *pos_p, int whence)
+{
+ exfile_t *xf = (exfile_t *)cookie;
+
+ /* Compute the absolute logical target position. */
+ off64_t new_pos = *pos_p;
+ if (whence == SEEK_CUR)
+ new_pos += xf->curr_pos;
+ else if (whence == SEEK_END)
+ new_pos += xf->total_ex_len;
+
+ /* Ensure that the target position is valid. Note that repositioning the
+ * file right past the last extent is considered valid, in line with normal
+ * seek behavior, although no write (nor read) can be performed there. */
+ if (new_pos < 0 || new_pos > xf->total_ex_len)
+ return -1;
+
+ if (new_pos != (off64_t)xf->curr_pos) {
+ /* Find the extend that contains the requested logical position; handle
+ * special case upfront, for efficiency. */
+ size_t new_ex_idx = 0;
+ if (new_pos == (off64_t)xf->total_ex_len)
+ new_ex_idx = xf->ex_count;
+ else if (new_pos)
+ new_ex_idx = ex_arr_search(xf->ex_count, xf->ex_arr,
+ xf->prefix_len_arr, new_pos,
+ xf->curr_ex_idx);
+
+ /* Set the logical position markers. */
+ xf->curr_ex_idx = new_ex_idx;
+ xf->curr_ex_pos =
+ (new_ex_idx < xf->ex_count ?
+ (size_t)(new_pos - xf->prefix_len_arr[new_ex_idx].prec) : 0);
+ xf->curr_pos = (off_t)new_pos;
+ }
+
+ *pos_p = new_pos;
+ return 0;
+}
+
+/* Closes an open extent file. This implements the cookie_close_function_t
+ * interface. Always returns 0 (success). */
+static int
+exfile_close(void *cookie)
+{
+ exfile_t *xf = (exfile_t *)cookie;
+ if (xf) {
+ if (xf->fd >= 0)
+ close(xf->fd);
+ free(xf->prefix_len_arr);
+ if (xf->ex_free)
+ xf->ex_free(xf->ex_arr);
+ free(xf);
+ }
+ return 0;
+}
+
+static const cookie_io_functions_t cookie_io_funcs = {
+ .read = exfile_read,
+ .write = exfile_write,
+ .seek = exfile_seek,
+ .close = exfile_close,
+};
+
+static FILE *
+exfile_open(int fd, const char *path, const char *fopen_mode, ex_t *ex_arr,
+ size_t ex_count, void (*ex_free)(void *))
+{
+ /* Argument sanity check. */
+ if (!(ex_arr && ex_count && (fd >= 0 || path) && (fd < 0 || !path)))
+ return NULL;
+
+ /* Validate mode argument. */
+ exfile_mode_t mode = EXFILE_MODE_MAX;
+ int i;
+ for (i = 0; i < arraysize(fopen_mode_to_mode); i++)
+ if (!strcmp(fopen_mode_to_mode[i].fopen_mode, fopen_mode)) {
+ mode = fopen_mode_to_mode[i].mode;
+ break;
+ }
+ if (mode == EXFILE_MODE_MAX)
+ return NULL;
+
+ /* Open the underlying file, if not already provided. */
+ int do_close_fd = FALSE;
+ if (fd < 0) {
+ if ((fd = open(path, mode_to_open_flags[mode])) < 0)
+ return NULL;
+ do_close_fd = TRUE;
+ }
+
+ /* Allocate memory and open file streams, for both the underlying file and
+ * the handle returned to the caller. */
+ exfile_t *xf = NULL;
+ prefix_len_t *prefix_len_arr = NULL;
+ FILE *handle = NULL;
+ if (!((xf = (exfile_t *)calloc(1, sizeof(exfile_t))) &&
+ (prefix_len_arr =
+ (prefix_len_t *)malloc(sizeof(prefix_len_t) * ex_count)) &&
+ (handle = fopencookie(xf, fopen_mode, cookie_io_funcs)))) {
+ /* If a file was opened above, close it. */
+ if (do_close_fd)
+ close(fd);
+ if (xf)
+ xf->fd = -1; /* invalidate prior to calling exfile_close() */
+
+ free(prefix_len_arr);
+ if (handle)
+ fclose(handle); /* will call exfile_close already */
+ else
+ exfile_close(xf);
+ return NULL;
+ }
+
+ /* Compute the prefix lengths. */
+ size_t prefix_len = 0;
+ for (i = 0; i < ex_count; i++) {
+ prefix_len_arr[i].prec = prefix_len;
+ prefix_len += ex_arr[i].len;
+ prefix_len_arr[i].total = prefix_len;
+ }
+
+ /* Configure control object, including physical/logical file position. */
+ xf->fd = fd;
+ xf->ex_count = ex_count;
+ xf->ex_arr = ex_arr;
+ xf->prefix_len_arr = prefix_len_arr;
+ xf->ex_free = ex_free;
+ xf->total_ex_len = prefix_len_arr[ex_count - 1].total;
+ xf->curr_file_pos = lseek(fd, 0, SEEK_CUR);
+ xf->curr_ex_idx = 0;
+ xf->curr_ex_pos = 0;
+ xf->curr_pos = 0;
+
+ /* Return the external stream handle. */
+ return handle;
+}
+
+FILE *
+exfile_fopen(const char *path, const char *fopen_mode, ex_t *ex_arr,
+ size_t ex_count, void (*ex_free)(void *))
+{
+ return exfile_open(-1, path, fopen_mode, ex_arr, ex_count, ex_free);
+}
+
+FILE *
+exfile_fdopen(int fd, const char *fopen_mode, ex_t *ex_arr,
+ size_t ex_count, void (*ex_free)(void *))
+{
+ return exfile_open(fd, NULL, fopen_mode, ex_arr, ex_count, ex_free);
+}
new file mode 100644
--- /dev/null
+++ exfile.h
@@ -0,0 +1,49 @@
+#ifndef __EXFILE_H
+#define __EXFILE_H
+
+#include <stdio.h>
+
+/*
+ * Extent files.
+ *
+ * This modules provides a familiar interface for handling files through an
+ * indirection layer of extents, which are contiguous chunks of variable length
+ * at arbitrary offsets within a file. Once an extent file handle is obtained,
+ * users may read, write and seek as they do with ordinary files, having the I/O
+ * with the underlying file done for them by the extent file implementation. The
+ * implementation supports "sparse extents", which are assumed to contain zeros
+ * but otherwise have no actual representation in the underlying file; these are
+ * denoted by negative offset values.
+ *
+ * Unlike ordinary files, the size of an extent file is fixed; it is not
+ * truncated on open, nor is writing past the extent span allowed. Also, writing
+ * to a sparse extent has no effect and will not raise an error.
+ */
+
+
+/* An extent, defined by an offset and a length. */
+typedef struct {
+ off_t off; /* the extent offset; negative indicates a sparse extent */
+ size_t len; /* the extent length */
+} ex_t;
+
+
+/* Opens a file |path| with use mode |fopen_mode| for use with an array of
+ * extents |ex_arr| of length |ex_count|. The mode string can be either of "r"
+ * (read-only), "w" (write-only) or "r+"/"w+" (read-write); the underlying file
+ * is neither created (if not present) nor truncated (if present) when opened
+ * for writing. The function |ex_free|, if not NULL, will be called to
+ * deallocate the extent array once the file object is closed. Returns a FILE
+ * pointer that can be used with ordinary stream functions (e.g. fread), or
+ * NULL if opening the file has failed. */
+FILE *exfile_fopen(const char *path, const char *fopen_mode, ex_t *ex_arr,
+ size_t ex_count, void (*ex_free)(void *));
+
+/* Associates an extent file stream with an already open file descriptor |fd|.
+ * The |fopen_mode| argument is as decribed above and must be compatible with
+ * the mode of |fd|. All other arguments, behaviors and return values are as
+ * those of exfile_fopen (see above). */
+FILE *exfile_fdopen(int fd, const char *fopen_mode, ex_t *ex_arr,
+ size_t ex_count, void (*ex_free)(void *));
+
+#endif /* __EXFILE_H */
new file mode 100644
--- /dev/null
+++ extents.c
@@ -0,0 +1,126 @@
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "extents.h"
+
+
+#define TRUE 1
+#define FALSE 0
+
+/* Minimum/maximum values for arbitrary integer types. */
+#define UNSIGNED_INT_MAX(t) (~((t)0))
+#define SIGNED_INT_MAX(t) ((t)((uintmax_t)UNSIGNED_INT_MAX(t) >> 1))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#define INT_TYPE_MAX(t) MAX(UNSIGNED_INT_MAX(t), SIGNED_INT_MAX(t))
+
+/* The maximum accepted value for a given integer type when parsed as a signed
+ * long long integer. This is defined to be the smaller of the maximum value
+ * that can be represented by this type and LLONG_MAX. This bound allows us to
+ * properly check that parsed values do not exceed the capacity of their
+ * intended store, regardless of how its size relates to that of a signed long
+ * long integer. Note: this may mean that we are losing the most significant
+ * bit of an unsigned 64-bit field (e.g. size_t on some platforms), however
+ * still permitting values up to 2^62, which is more than enough for all
+ * practical purposes. */
+#define LLONG_MAX_BOUND(s) \
+ ((uintmax_t)(s) < (uintmax_t)LLONG_MAX ? (long long)(s) : LLONG_MAX)
+#define MAX_ALLOWED(t) LLONG_MAX_BOUND(INT_TYPE_MAX(t))
+
+/* Get the type of a struct field. */
+#define FIELD_TYPE(t, f) typeof(((t *)0)->f)
+
+
+/* Reads a long long integer from |s| into |*val_p|. Returns a pointer to the
+ * character immediately following the specified |delim|, unless (a) parsing
+ * failed (overflow or no valid digits); (b) the read value is less than
+ * |min_val| or greater than |max_val|; (c) the delimiter character is not
+ * |delim|, or the string ends although |may_end| is false. In any of these
+ * cases, returns NULL. */
+const char *
+read_llong(const char *s, long long *val_p, long long min_val,
+ long long max_val, char delim, int may_end)
+{
+ assert(val_p);
+ const char *next_s;
+ errno = 0;
+ long long val = strtoll(s, (char **)&next_s, 10);
+ if (((val == LLONG_MAX || val == LLONG_MIN) && errno == ERANGE) ||
+ next_s == s || val < min_val || val > max_val ||
+ (*next_s ? *next_s != delim : !may_end))
+ return NULL; /* bad value or delimiter */
+ *val_p = val;
+ if (*next_s)
+ next_s++; /* skip delimeter */
+ return next_s;
+}
+
+
+/* Reads a comma-separated list of "offset:length" extents from |ex_str|. If
+ * |ex_arr| is NULL, then |ex_count| is ignored and it attempts to parse valid
+ * extents until the end of the string is reached. Otherwise, stores up to
+ * |ex_count| extents into |ex_arr|, which must be of at least this size.
+ * Returns the number of correctly parsed extents, or -1 if a malformed extent
+ * was found. */
+static ssize_t
+extents_read(const char *ex_str, ex_t *ex_arr, size_t ex_count)
+{
+ size_t i;
+ size_t last_i = ex_count - 1;
+ if (!ex_arr) {
+ ex_count = SIZE_MAX;
+ last_i = 0;
+ }
+ for (i = 0; *ex_str && i < ex_count; i++) {
+ long long raw_off = 0, raw_len = 0;
+ if (!((ex_str = read_llong(ex_str, &raw_off, -1,
+ MAX_ALLOWED(FIELD_TYPE(ex_t, off)),
+ ':', FALSE)) &&
+ (ex_str = read_llong(ex_str, &raw_len, 1,
+ MAX_ALLOWED(FIELD_TYPE(ex_t, len)),
+ ',', i >= last_i))))
+ return -1; /* parsing error */
+ if (ex_arr) {
+ ex_arr[i].off = raw_off;
+ ex_arr[i].len = raw_len;
+ }
+ }
+ return i;
+}
+
+
+ex_t *
+extents_parse(const char *ex_str, ex_t *ex_arr, size_t *ex_count_p)
+{
+ /* Sanity checks: a string must be provided; if an array is provided, an
+ * array count must be given as well. */
+ if (!ex_str || (ex_arr && !ex_count_p))
+ return NULL;
+
+ /* Parse string and count extents. */
+ ssize_t ret = extents_read(ex_str, NULL, 0);
+ if (ret < 0)
+ return NULL; /* parsing error */
+ size_t ex_count = (size_t)ret;
+
+ /* Input is good, commit to extent count. */
+ if (ex_count_p) {
+ size_t alloc_ex_count = *ex_count_p;
+ *ex_count_p = ex_count;
+ if (ex_arr && alloc_ex_count < ex_count)
+ return NULL; /* insufficient allocated space */
+ }
+ if (ex_count == 0)
+ return NULL; /* no extents, nothing to do */
+
+ /* Allocate extent array, if needed. */
+ if (!(ex_arr || (ex_arr = (ex_t *)malloc(sizeof(ex_t) * ex_count))))
+ return NULL; /* allocation failed */
+
+ /* Populate the extent array. */
+ extents_read(ex_str, ex_arr, ex_count);
+
+ return ex_arr;
+}
new file mode 100644
--- /dev/null
+++ extents.h
@@ -0,0 +1,22 @@
+#ifndef __EXTENTS_H
+#define __EXTENTS_H
+
+#include "exfile.h"
+
+
+/* Parses a string representation |ex_str| and populates an array |ex_arr|
+ * consisting of |*ex_count_p| extents. The string is expected to be a
+ * comma-separated list of pairs of the form "offset:length". An offset may be
+ * -1 or a non-negative integer; the former indicates a sparse extent
+ * (consisting of zeros). A length is a positive integer. If |ex_arr| is NULL,
+ * |*ex_count_p| is ignored and a new array is allocated based on the actual
+ * number of extents parsed. Upon success, returns a pointer to the populated
+ * array of extents and stores the actual number of extents at the location
+ * pointed to be |ex_count_p| (if provided). If the string parses correctly but
+ * the operation otherwise fails (allocation error, array too small), returns
+ * NULL but still store the number of parsed extents. Otherwise, returns NULL
+ * and does not store anything. If a new array was allocated, then it should be
+ * deallocated with free(3). */
+ex_t *extents_parse(const char *ex_str, ex_t *ex_arr, size_t *ex_count_p);
+
+#endif /* __EXTENTS_H */