| Reduced bspatch memory footprint via lazy reading of old file data. |
| --- bspatch.1 |
| +++ bspatch.1 |
| @@ -33,24 +33,42 @@ |
| .Nd apply a patch built with bsdiff(1) |
| .Sh SYNOPSIS |
| .Nm |
| -.Ao Ar oldfile Ac Ao Ar newfile Ac Ao Ar patchfile Ac |
| +.Ar oldfile newfile patchfile |
| +.Op Ar old-extents new-extents |
| .Sh DESCRIPTION |
| .Nm |
| generates |
| -.Ao Ar newfile Ac |
| +.Ar newfile |
| from |
| -.Ao Ar oldfile Ac |
| +.Ar oldfile |
| and |
| -.Ao Ar patchfile Ac |
| +.Ar patchfile , |
| where |
| -.Ao Ar patchfile Ac |
| -is a binary patch built by bsdiff(1). |
| +.Ar patchfile |
| +is a binary patch built by |
| +.Xr bsdiff 1 . |
| +.Pp |
| +When provided, |
| +.Ar old-extents |
| +and |
| +.Ar new-extents |
| +instruct |
| +.Nm |
| +to read specific chunks of data from the old file and to write to specific |
| +locations in the new file, respectively. Each is a comma-separated list of |
| +extents of the form |
| +.Ar offset : Ns Ar length , |
| +where |
| +.Ar offset |
| +is either -1 or a non-negative integer and |
| +.Ar length |
| +is a positive integer. An offset value of -1 denotes a sparse extent, namely a |
| +sequence of zeros that entails neither reading nor writing of actual file |
| +content. |
| .Pp |
| .Nm |
| uses memory equal to the size of |
| -.Ao Ar oldfile Ac |
| -plus the size of |
| -.Ao Ar newfile Ac , |
| +.Ar newfile , |
| but can tolerate a very small working set without a dramatic loss |
| of performance. |
| .Sh SEE ALSO |
| --- bspatch.c |
| +++ bspatch.c |
| @@ -29,12 +29,17 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59: |
| #endif |
| |
| #include <bzlib.h> |
| -#include <stdlib.h> |
| +#include <err.h> |
| +#include <fcntl.h> |
| +#include <inttypes.h> |
| #include <stdio.h> |
| +#include <stdlib.h> |
| #include <string.h> |
| -#include <err.h> |
| #include <unistd.h> |
| -#include <fcntl.h> |
| + |
| +#include "exfile.h" |
| +#include "extents.h" |
| + |
| |
| static off_t offtin(u_char *buf) |
| { |
| @@ -54,22 +59,43 @@ static off_t offtin(u_char *buf) |
| return y; |
| } |
| |
| +/* Parses an extent string ex_str, returning a pointer to a newly allocated |
| + * array of extents. The number of extents is stored in ex_count_p (if |
| + * provided). */ |
| +static ex_t *parse_extent_str(const char *ex_str, size_t *ex_count_p) |
| +{ |
| + size_t ex_count = (size_t)-1; |
| + ex_t *ex_arr = extents_parse(ex_str, NULL, &ex_count); |
| + if (!ex_arr) |
| + errx(1, (ex_count == (size_t)-1 ? |
| + "error parsing extents" : |
| + "error allocating extent array")); |
| + if (ex_count_p) |
| + *ex_count_p = ex_count; |
| + return ex_arr; |
| +} |
| + |
| +#define USAGE_TEMPLATE_STR \ |
| + "usage: %s oldfile newfile patchfile [old-extents new-extents]\n" \ |
| + "with extents taking the form \"off_1:len_1,...,off_n:len_n\"\n" |
| + |
| int main(int argc,char * argv[]) |
| { |
| FILE * f, * cpf, * dpf, * epf; |
| BZFILE * cpfbz2, * dpfbz2, * epfbz2; |
| int cbz2err, dbz2err, ebz2err; |
| - int fd; |
| + FILE *old_file = NULL, *new_file = NULL; |
| ssize_t oldsize,newsize; |
| ssize_t bzctrllen,bzdatalen; |
| u_char header[32],buf[8]; |
| - u_char *old, *new; |
| + u_char *new; |
| off_t oldpos,newpos; |
| off_t ctrl[3]; |
| off_t lenread; |
| - off_t i; |
| + off_t i, j; |
| |
| - if(argc!=4) errx(1,"usage: %s oldfile newfile patchfile\n",argv[0]); |
| + if ((argc != 6) && (argc != 4)) errx(1, USAGE_TEMPLATE_STR, argv[0]); |
| + int using_extents = (argc == 6); |
| |
| /* Open patch file */ |
| if ((f = fopen(argv[3], "r")) == NULL) |
| @@ -132,12 +158,21 @@ int main(int argc,char * argv[]) |
| if ((epfbz2 = BZ2_bzReadOpen(&ebz2err, epf, 0, 0, NULL, 0)) == NULL) |
| errx(1, "BZ2_bzReadOpen, bz2err = %d", ebz2err); |
| |
| - if(((fd=open(argv[1],O_RDONLY,0))<0) || |
| - ((oldsize=lseek(fd,0,SEEK_END))==-1) || |
| - ((old=malloc(oldsize+1))==NULL) || |
| - (lseek(fd,0,SEEK_SET)!=0) || |
| - (read(fd,old,oldsize)!=oldsize) || |
| - (close(fd)==-1)) err(1,"%s",argv[1]); |
| + /* Open input file for reading. */ |
| + if (using_extents) { |
| + size_t ex_count = 0; |
| + ex_t *ex_arr = parse_extent_str(argv[4], &ex_count); |
| + old_file = exfile_fopen(argv[1], "r", ex_arr, ex_count, free); |
| + } else { |
| + old_file = fopen(argv[1], "r"); |
| + } |
| + if (!old_file || |
| + fseek(old_file, 0, SEEK_END) != 0 || |
| + (oldsize = ftell(old_file)) < 0 || |
| + fseek(old_file, 0, SEEK_SET) != 0) |
| + err(1, "cannot obtain the size of %s", argv[1]); |
| + off_t old_file_pos = 0; |
| + |
| if((new=malloc(newsize+1))==NULL) err(1,NULL); |
| |
| oldpos=0;newpos=0; |
| @@ -161,10 +196,25 @@ int main(int argc,char * argv[]) |
| ((dbz2err != BZ_OK) && (dbz2err != BZ_STREAM_END))) |
| errx(1, "Corrupt patch\n"); |
| |
| - /* Add old data to diff string */ |
| - for(i=0;i<ctrl[0];i++) |
| - if((oldpos+i>=0) && (oldpos+i<oldsize)) |
| - new[newpos+i]+=old[oldpos+i]; |
| + /* Add old data to diff string. It is enough to fseek once, at |
| + * the beginning of the sequence, to avoid unnecessary |
| + * overhead. */ |
| + j = newpos; |
| + if ((i = oldpos) < 0) { |
| + j -= i; |
| + i = 0; |
| + } |
| + if (i != old_file_pos && fseek(old_file, i, SEEK_SET) < 0) |
| + err(1, "error seeking input file to offset %" PRIdMAX, |
| + (intmax_t)i); |
| + if ((old_file_pos = oldpos + ctrl[0]) > oldsize) |
| + old_file_pos = oldsize; |
| + while (i++ < old_file_pos) { |
| + u_char c; |
| + if (fread_unlocked(&c, 1, 1, old_file) != 1) |
| + err(1, "error reading from input file"); |
| + new[j++] += c; |
| + } |
| |
| /* Adjust pointers */ |
| newpos+=ctrl[0]; |
| @@ -185,6 +235,9 @@ int main(int argc,char * argv[]) |
| oldpos+=ctrl[2]; |
| }; |
| |
| + /* Close input file. */ |
| + fclose(old_file); |
| + |
| /* Clean up the bzip2 reads */ |
| BZ2_bzReadClose(&cbz2err, cpfbz2); |
| BZ2_bzReadClose(&dbz2err, dpfbz2); |
| @@ -193,12 +246,19 @@ int main(int argc,char * argv[]) |
| err(1, "fclose(%s)", argv[3]); |
| |
| /* Write the new file */ |
| - if(((fd=open(argv[2],O_CREAT|O_TRUNC|O_WRONLY,0666))<0) || |
| - (write(fd,new,newsize)!=newsize) || (close(fd)==-1)) |
| + if (using_extents) { |
| + size_t ex_count = 0; |
| + ex_t *ex_arr = parse_extent_str(argv[5], &ex_count); |
| + new_file = exfile_fopen(argv[2], "w", ex_arr, ex_count, free); |
| + } else { |
| + new_file = fopen(argv[2], "w"); |
| + } |
| + if (!new_file || |
| + fwrite_unlocked(new, 1, newsize, new_file) != newsize || |
| + fclose(new_file) == EOF) |
| err(1,"%s",argv[2]); |
| |
| free(new); |
| - free(old); |
| |
| return 0; |
| } |
| new file mode 100644 |
| --- /dev/null |
| +++ exfile.c |
| @@ -0,0 +1,413 @@ |
| +#define _GNU_SOURCE |
| +#include <assert.h> |
| +#include <stdio.h> |
| +#include <stdlib.h> |
| +#include <fcntl.h> |
| +#include <string.h> |
| +#include <sys/stat.h> |
| +#include <sys/types.h> |
| +#include <unistd.h> |
| + |
| +#include "exfile.h" |
| + |
| +/* |
| + * Extent files implementation. Some things worth noting: |
| + * |
| + * - We are using glibc's buffered FILE objects for the underlying file I/O; |
| + * this contributes to improved performance, especially with badly fragmented |
| + * extents. However, the FILE handle we return to the caller is decidedly |
| + * unbuffered: making it buffered too seems superfluous, causing excess data |
| + * copying and memory use. |
| + * |
| + * - We maintain the "logical" file position separately from the "physical" |
| + * (underlying) file position. The latter is updated lazily whenever actual |
| + * file I/O is about to be performed. |
| + * |
| + * - The logical position of an extent file is internally represented by the |
| + * current extent index (curr_ex_idx) and the position within the current |
| + * extent (curr_ex_pos), as well as an absolute logical position (curr_pos). |
| + * In general, curr_pos should equal the total length of all extents prior to |
| + * curr_ex_idx, plus curr_ex_pos. Also, curr_ex_idx may range between 0 and |
| + * the total extent count; if it is exactly the latter, then curr_ex_pos must |
| + * be zero, representing the fact that the we are at the logical end of the |
| + * file. Otherwise, curr_ex_pos may range between 0 and the length of the |
| + * current extent; if it is exactly the latter, then this is equivalent to |
| + * position zero on the next extent. All functions should honor this |
| + * duality. |
| + * |
| + * - Seeking is done efficiently at O(log(D)), where D is the |
| + * number of extents between the current position and the new one. This seems |
| + * like a good midway for supporting both sequential and random access. |
| + */ |
| + |
| + |
| +#define TRUE 1 |
| +#define FALSE 0 |
| + |
| +#define arraysize(x) (sizeof(x) / sizeof(*(x))) |
| + |
| + |
| +/* Extent prefix length. */ |
| +typedef struct { |
| + size_t prec; /* total length of preceding extents */ |
| + size_t total; /* total length including current extent */ |
| +} prefix_len_t; |
| + |
| +/* Extent file logical modes. Used as index to the mapping from logical modes |
| + * to open(2) and fopen(3) modes below. */ |
| +typedef enum { |
| + EXFILE_MODE_RO, |
| + EXFILE_MODE_WO, |
| + EXFILE_MODE_RW, |
| + EXFILE_MODE_MAX /* sentinel */ |
| +} exfile_mode_t; |
| + |
| +/* An extent file control object (aka "cookie"). */ |
| +typedef struct { |
| + int fd; /* underlying file descriptor */ |
| + size_t ex_count; /* number of extents (non-zero) */ |
| + ex_t *ex_arr; /* array of extents */ |
| + prefix_len_t *prefix_len_arr; /* total lengths of extent prefixes */ |
| + void (*ex_free)(void *); /* extent array free function */ |
| + size_t total_ex_len; /* total length of all extents (constant) */ |
| + off_t curr_file_pos; /* current underlying file position */ |
| + size_t curr_ex_idx; /* current extent index */ |
| + size_t curr_ex_pos; /* current position within extent */ |
| + off_t curr_pos; /* current logical file position */ |
| +} exfile_t; |
| + |
| + |
| +/* Mapping from fopen(3) modes to extent file logical modes. */ |
| +static const struct { |
| + const char *fopen_mode; |
| + exfile_mode_t mode; |
| +} fopen_mode_to_mode[] = { |
| + {"r", EXFILE_MODE_RO}, |
| + {"r+", EXFILE_MODE_RW}, |
| + {"w", EXFILE_MODE_WO}, |
| + {"w+", EXFILE_MODE_RW}, |
| +}; |
| + |
| + |
| +/* Mapping from extent file logical modes to open(2) flags. */ |
| +static const int mode_to_open_flags[EXFILE_MODE_MAX] = { |
| + O_RDONLY, |
| + O_WRONLY, |
| + O_RDWR, |
| +}; |
| + |
| + |
| +/* Searches an array |ex_arr| of |ex_count| extents and returns the index of |
| + * the extent that contains the location |pos|. Uses an array |prefix_len_arr| |
| + * of corresponding prefix lengths. The total complexity is O(log(D)), where D |
| + * is the distance between the returned extent index and |init_ex_idx|. */ |
| +static size_t |
| +ex_arr_search(size_t ex_count, const ex_t *ex_arr, |
| + const prefix_len_t *prefix_len_arr, size_t pos, |
| + size_t init_ex_idx) |
| +{ |
| + assert(ex_arr && ex_count); |
| + const size_t last_ex_idx = ex_count - 1; |
| + assert(init_ex_idx <= ex_count); |
| + assert(pos < prefix_len_arr[last_ex_idx].total); |
| + if (init_ex_idx == ex_count) |
| + init_ex_idx = last_ex_idx; /* adjustment for purposes of the search below */ |
| + |
| + /* First, search in exponentially increasing leaps from the current extent, |
| + * until an interval bounding the target position was obtained. Here i and j |
| + * are the left and right (inclusive) index boundaries, respectively. */ |
| + ssize_t i = init_ex_idx; |
| + ssize_t j = i; |
| + size_t leap = 1; |
| + /* Go left, as needed. */ |
| + while (i > 0 && pos < prefix_len_arr[i].prec) { |
| + j = i - 1; |
| + if ((i -= leap) < 0) |
| + i = 0; |
| + leap <<= 1; |
| + } |
| + /* Go right, as needed. */ |
| + while (j < last_ex_idx && pos >= prefix_len_arr[j].total) { |
| + i = j + 1; |
| + if ((j += leap) > last_ex_idx) |
| + j = last_ex_idx; |
| + leap <<= 1; |
| + } |
| + |
| + /* Then, perform a binary search between i and j. */ |
| + size_t k = 0; |
| + while (1) { |
| + k = (i + j) / 2; |
| + if (pos < prefix_len_arr[k].prec) |
| + j = k - 1; |
| + else if (pos >= prefix_len_arr[k].total) |
| + i = k + 1; |
| + else |
| + break; |
| + } |
| + |
| + return k; |
| +} |
| + |
| +/* Performs I/O operations (either read or write) on an extent file, advancing |
| + * through consecutive extents and updating the logical/physical file position |
| + * as we go. */ |
| +static ssize_t |
| +exfile_io(exfile_t *xf, int do_read, char *buf, size_t size) |
| +{ |
| + if (xf->curr_ex_idx == xf->ex_count) |
| + return 0; /* end-of-extent-file */ |
| + |
| + /* Reading or writing? */ |
| + typedef ssize_t (io_func_t)(int, void *, size_t); |
| + io_func_t *io_func; |
| + ssize_t error_ret_val; |
| + if (do_read) { |
| + io_func = read; |
| + error_ret_val = -1; |
| + } else { |
| + io_func = (io_func_t *)write; |
| + error_ret_val = 0; /* must not return a negative value when writing */ |
| + } |
| + |
| + /* Start processing data along extents. */ |
| + const ex_t *curr_ex = xf->ex_arr + xf->curr_ex_idx; |
| + assert(curr_ex->len >= xf->curr_ex_pos); |
| + size_t curr_ex_rem_len = curr_ex->len - xf->curr_ex_pos; |
| + ssize_t total_bytes = 0; |
| + while (size) { |
| + /* Advance to the next extent of non-zero length. */ |
| + while (curr_ex_rem_len == 0) { |
| + xf->curr_ex_idx++; |
| + xf->curr_ex_pos = 0; |
| + if (xf->curr_ex_idx == xf->ex_count) |
| + return total_bytes; /* end-of-extent-file */ |
| + curr_ex++; |
| + curr_ex_rem_len = curr_ex->len; |
| + } |
| + |
| + const int is_real_ex = (curr_ex->off >= 0); |
| + |
| + /* Seek to the correct file position, as necessary. */ |
| + if (is_real_ex) { |
| + const off_t file_pos = curr_ex->off + xf->curr_ex_pos; |
| + if (xf->curr_file_pos != file_pos) { |
| + if (lseek(xf->fd, file_pos, SEEK_SET) == (off_t)-1) { |
| + xf->curr_file_pos = -1; /* unknown file position */ |
| + return total_bytes ? total_bytes : error_ret_val; |
| + } |
| + xf->curr_file_pos = file_pos; |
| + } |
| + } |
| + |
| + /* Process data to the end of the current extent or the requested |
| + * count, whichever is smaller. */ |
| + size_t io_count = (size < curr_ex_rem_len ? size : curr_ex_rem_len); |
| + ssize_t io_bytes = io_count; |
| + if (is_real_ex) |
| + io_bytes = io_func(xf->fd, buf, io_count); |
| + else if (do_read) |
| + memset(buf, 0, io_count); |
| + |
| + /* Stop on error. */ |
| + if (io_bytes < 0) { |
| + if (total_bytes == 0) |
| + total_bytes = error_ret_val; |
| + break; |
| + } |
| + |
| + /* Update read state. */ |
| + total_bytes += io_bytes; |
| + if (is_real_ex) |
| + xf->curr_file_pos += io_bytes; |
| + xf->curr_ex_pos += io_bytes; |
| + xf->curr_pos += io_bytes; |
| + |
| + /* If we didn't read the whole extent, finish; delegate handling of |
| + * partial read/write back to the caller. */ |
| + if ((curr_ex_rem_len -= io_bytes) > 0) |
| + break; |
| + |
| + /* Update total count and buffer pointer. */ |
| + size -= io_bytes; |
| + buf += io_bytes; |
| + } |
| + |
| + return total_bytes; |
| +} |
| + |
| +/* Reads up to |size| bytes from an extent file into |buf|. This implements the |
| + * cookie_read_function_t interface and is a thin wrapper around exfile_io() |
| + * (see above). Returns the number of bytes read, or -1 on error. */ |
| +static ssize_t |
| +exfile_read(void *cookie, char *buf, size_t size) |
| +{ |
| + return exfile_io((exfile_t *)cookie, TRUE, buf, size); |
| +} |
| + |
| +/* Writes up to |size| bytes from |buf| to an extent file. This implements the |
| + * cookie_write_function_t interface and is a thin wrapper around exfile_io() |
| + * (see above). Returns the number of bytes written; must NOT return a negative |
| + * value. */ |
| +static ssize_t |
| +exfile_write(void *cookie, const char *buf, size_t size) |
| +{ |
| + return exfile_io((exfile_t *)cookie, FALSE, (char *)buf, size); |
| +} |
| + |
| +/* Performs seek on an extent file, repositioning it to the value of |*pos_p| |
| + * according to |whence|. This implements the cookie_seek_function_t interface. |
| + * On success, stores the resulting logical position measured in bytes along |
| + * contiguous extents into |*pos_p| and returns 0; otherwise returns -1. */ |
| +static int |
| +exfile_seek(void *cookie, off64_t *pos_p, int whence) |
| +{ |
| + exfile_t *xf = (exfile_t *)cookie; |
| + |
| + /* Compute the absolute logical target position. */ |
| + off64_t new_pos = *pos_p; |
| + if (whence == SEEK_CUR) |
| + new_pos += xf->curr_pos; |
| + else if (whence == SEEK_END) |
| + new_pos += xf->total_ex_len; |
| + |
| + /* Ensure that the target position is valid. Note that repositioning the |
| + * file right past the last extent is considered valid, in line with normal |
| + * seek behavior, although no write (nor read) can be performed there. */ |
| + if (new_pos < 0 || new_pos > xf->total_ex_len) |
| + return -1; |
| + |
| + if (new_pos != (off64_t)xf->curr_pos) { |
| + /* Find the extend that contains the requested logical position; handle |
| + * special case upfront, for efficiency. */ |
| + size_t new_ex_idx = 0; |
| + if (new_pos == (off64_t)xf->total_ex_len) |
| + new_ex_idx = xf->ex_count; |
| + else if (new_pos) |
| + new_ex_idx = ex_arr_search(xf->ex_count, xf->ex_arr, |
| + xf->prefix_len_arr, new_pos, |
| + xf->curr_ex_idx); |
| + |
| + /* Set the logical position markers. */ |
| + xf->curr_ex_idx = new_ex_idx; |
| + xf->curr_ex_pos = |
| + (new_ex_idx < xf->ex_count ? |
| + (size_t)(new_pos - xf->prefix_len_arr[new_ex_idx].prec) : 0); |
| + xf->curr_pos = (off_t)new_pos; |
| + } |
| + |
| + *pos_p = new_pos; |
| + return 0; |
| +} |
| + |
| +/* Closes an open extent file. This implements the cookie_close_function_t |
| + * interface. Always returns 0 (success). */ |
| +static int |
| +exfile_close(void *cookie) |
| +{ |
| + exfile_t *xf = (exfile_t *)cookie; |
| + if (xf) { |
| + if (xf->fd >= 0) |
| + close(xf->fd); |
| + free(xf->prefix_len_arr); |
| + if (xf->ex_free) |
| + xf->ex_free(xf->ex_arr); |
| + free(xf); |
| + } |
| + return 0; |
| +} |
| + |
| +static const cookie_io_functions_t cookie_io_funcs = { |
| + .read = exfile_read, |
| + .write = exfile_write, |
| + .seek = exfile_seek, |
| + .close = exfile_close, |
| +}; |
| + |
| +static FILE * |
| +exfile_open(int fd, const char *path, const char *fopen_mode, ex_t *ex_arr, |
| + size_t ex_count, void (*ex_free)(void *)) |
| +{ |
| + /* Argument sanity check. */ |
| + if (!(ex_arr && ex_count && (fd >= 0 || path) && (fd < 0 || !path))) |
| + return NULL; |
| + |
| + /* Validate mode argument. */ |
| + exfile_mode_t mode = EXFILE_MODE_MAX; |
| + int i; |
| + for (i = 0; i < arraysize(fopen_mode_to_mode); i++) |
| + if (!strcmp(fopen_mode_to_mode[i].fopen_mode, fopen_mode)) { |
| + mode = fopen_mode_to_mode[i].mode; |
| + break; |
| + } |
| + if (mode == EXFILE_MODE_MAX) |
| + return NULL; |
| + |
| + /* Open the underlying file, if not already provided. */ |
| + int do_close_fd = FALSE; |
| + if (fd < 0) { |
| + if ((fd = open(path, mode_to_open_flags[mode])) < 0) |
| + return NULL; |
| + do_close_fd = TRUE; |
| + } |
| + |
| + /* Allocate memory and open file streams, for both the underlying file and |
| + * the handle returned to the caller. */ |
| + exfile_t *xf = NULL; |
| + prefix_len_t *prefix_len_arr = NULL; |
| + FILE *handle = NULL; |
| + if (!((xf = (exfile_t *)calloc(1, sizeof(exfile_t))) && |
| + (prefix_len_arr = |
| + (prefix_len_t *)malloc(sizeof(prefix_len_t) * ex_count)) && |
| + (handle = fopencookie(xf, fopen_mode, cookie_io_funcs)))) { |
| + /* If a file was opened above, close it. */ |
| + if (do_close_fd) |
| + close(fd); |
| + if (xf) |
| + xf->fd = -1; /* invalidate prior to calling exfile_close() */ |
| + |
| + free(prefix_len_arr); |
| + if (handle) |
| + fclose(handle); /* will call exfile_close already */ |
| + else |
| + exfile_close(xf); |
| + return NULL; |
| + } |
| + |
| + /* Compute the prefix lengths. */ |
| + size_t prefix_len = 0; |
| + for (i = 0; i < ex_count; i++) { |
| + prefix_len_arr[i].prec = prefix_len; |
| + prefix_len += ex_arr[i].len; |
| + prefix_len_arr[i].total = prefix_len; |
| + } |
| + |
| + /* Configure control object, including physical/logical file position. */ |
| + xf->fd = fd; |
| + xf->ex_count = ex_count; |
| + xf->ex_arr = ex_arr; |
| + xf->prefix_len_arr = prefix_len_arr; |
| + xf->ex_free = ex_free; |
| + xf->total_ex_len = prefix_len_arr[ex_count - 1].total; |
| + xf->curr_file_pos = lseek(fd, 0, SEEK_CUR); |
| + xf->curr_ex_idx = 0; |
| + xf->curr_ex_pos = 0; |
| + xf->curr_pos = 0; |
| + |
| + /* Return the external stream handle. */ |
| + return handle; |
| +} |
| + |
| +FILE * |
| +exfile_fopen(const char *path, const char *fopen_mode, ex_t *ex_arr, |
| + size_t ex_count, void (*ex_free)(void *)) |
| +{ |
| + return exfile_open(-1, path, fopen_mode, ex_arr, ex_count, ex_free); |
| +} |
| + |
| +FILE * |
| +exfile_fdopen(int fd, const char *fopen_mode, ex_t *ex_arr, |
| + size_t ex_count, void (*ex_free)(void *)) |
| +{ |
| + return exfile_open(fd, NULL, fopen_mode, ex_arr, ex_count, ex_free); |
| +} |
| new file mode 100644 |
| --- /dev/null |
| +++ exfile.h |
| @@ -0,0 +1,49 @@ |
| +#ifndef __EXFILE_H |
| +#define __EXFILE_H |
| + |
| +#include <stdio.h> |
| + |
| +/* |
| + * Extent files. |
| + * |
| + * This modules provides a familiar interface for handling files through an |
| + * indirection layer of extents, which are contiguous chunks of variable length |
| + * at arbitrary offsets within a file. Once an extent file handle is obtained, |
| + * users may read, write and seek as they do with ordinary files, having the I/O |
| + * with the underlying file done for them by the extent file implementation. The |
| + * implementation supports "sparse extents", which are assumed to contain zeros |
| + * but otherwise have no actual representation in the underlying file; these are |
| + * denoted by negative offset values. |
| + * |
| + * Unlike ordinary files, the size of an extent file is fixed; it is not |
| + * truncated on open, nor is writing past the extent span allowed. Also, writing |
| + * to a sparse extent has no effect and will not raise an error. |
| + */ |
| + |
| + |
| +/* An extent, defined by an offset and a length. */ |
| +typedef struct { |
| + off_t off; /* the extent offset; negative indicates a sparse extent */ |
| + size_t len; /* the extent length */ |
| +} ex_t; |
| + |
| + |
| +/* Opens a file |path| with use mode |fopen_mode| for use with an array of |
| + * extents |ex_arr| of length |ex_count|. The mode string can be either of "r" |
| + * (read-only), "w" (write-only) or "r+"/"w+" (read-write); the underlying file |
| + * is neither created (if not present) nor truncated (if present) when opened |
| + * for writing. The function |ex_free|, if not NULL, will be called to |
| + * deallocate the extent array once the file object is closed. Returns a FILE |
| + * pointer that can be used with ordinary stream functions (e.g. fread), or |
| + * NULL if opening the file has failed. */ |
| +FILE *exfile_fopen(const char *path, const char *fopen_mode, ex_t *ex_arr, |
| + size_t ex_count, void (*ex_free)(void *)); |
| + |
| +/* Associates an extent file stream with an already open file descriptor |fd|. |
| + * The |fopen_mode| argument is as decribed above and must be compatible with |
| + * the mode of |fd|. All other arguments, behaviors and return values are as |
| + * those of exfile_fopen (see above). */ |
| +FILE *exfile_fdopen(int fd, const char *fopen_mode, ex_t *ex_arr, |
| + size_t ex_count, void (*ex_free)(void *)); |
| + |
| +#endif /* __EXFILE_H */ |
| new file mode 100644 |
| --- /dev/null |
| +++ extents.c |
| @@ -0,0 +1,126 @@ |
| +#include <assert.h> |
| +#include <errno.h> |
| +#include <limits.h> |
| +#include <stdint.h> |
| +#include <stdlib.h> |
| + |
| +#include "extents.h" |
| + |
| + |
| +#define TRUE 1 |
| +#define FALSE 0 |
| + |
| +/* Minimum/maximum values for arbitrary integer types. */ |
| +#define UNSIGNED_INT_MAX(t) (~((t)0)) |
| +#define SIGNED_INT_MAX(t) ((t)((uintmax_t)UNSIGNED_INT_MAX(t) >> 1)) |
| +#define MAX(a,b) ((a) > (b) ? (a) : (b)) |
| +#define INT_TYPE_MAX(t) MAX(UNSIGNED_INT_MAX(t), SIGNED_INT_MAX(t)) |
| + |
| +/* The maximum accepted value for a given integer type when parsed as a signed |
| + * long long integer. This is defined to be the smaller of the maximum value |
| + * that can be represented by this type and LLONG_MAX. This bound allows us to |
| + * properly check that parsed values do not exceed the capacity of their |
| + * intended store, regardless of how its size relates to that of a signed long |
| + * long integer. Note: this may mean that we are losing the most significant |
| + * bit of an unsigned 64-bit field (e.g. size_t on some platforms), however |
| + * still permitting values up to 2^62, which is more than enough for all |
| + * practical purposes. */ |
| +#define LLONG_MAX_BOUND(s) \ |
| + ((uintmax_t)(s) < (uintmax_t)LLONG_MAX ? (long long)(s) : LLONG_MAX) |
| +#define MAX_ALLOWED(t) LLONG_MAX_BOUND(INT_TYPE_MAX(t)) |
| + |
| +/* Get the type of a struct field. */ |
| +#define FIELD_TYPE(t, f) typeof(((t *)0)->f) |
| + |
| + |
| +/* Reads a long long integer from |s| into |*val_p|. Returns a pointer to the |
| + * character immediately following the specified |delim|, unless (a) parsing |
| + * failed (overflow or no valid digits); (b) the read value is less than |
| + * |min_val| or greater than |max_val|; (c) the delimiter character is not |
| + * |delim|, or the string ends although |may_end| is false. In any of these |
| + * cases, returns NULL. */ |
| +const char * |
| +read_llong(const char *s, long long *val_p, long long min_val, |
| + long long max_val, char delim, int may_end) |
| +{ |
| + assert(val_p); |
| + const char *next_s; |
| + errno = 0; |
| + long long val = strtoll(s, (char **)&next_s, 10); |
| + if (((val == LLONG_MAX || val == LLONG_MIN) && errno == ERANGE) || |
| + next_s == s || val < min_val || val > max_val || |
| + (*next_s ? *next_s != delim : !may_end)) |
| + return NULL; /* bad value or delimiter */ |
| + *val_p = val; |
| + if (*next_s) |
| + next_s++; /* skip delimeter */ |
| + return next_s; |
| +} |
| + |
| + |
| +/* Reads a comma-separated list of "offset:length" extents from |ex_str|. If |
| + * |ex_arr| is NULL, then |ex_count| is ignored and it attempts to parse valid |
| + * extents until the end of the string is reached. Otherwise, stores up to |
| + * |ex_count| extents into |ex_arr|, which must be of at least this size. |
| + * Returns the number of correctly parsed extents, or -1 if a malformed extent |
| + * was found. */ |
| +static ssize_t |
| +extents_read(const char *ex_str, ex_t *ex_arr, size_t ex_count) |
| +{ |
| + size_t i; |
| + size_t last_i = ex_count - 1; |
| + if (!ex_arr) { |
| + ex_count = SIZE_MAX; |
| + last_i = 0; |
| + } |
| + for (i = 0; *ex_str && i < ex_count; i++) { |
| + long long raw_off = 0, raw_len = 0; |
| + if (!((ex_str = read_llong(ex_str, &raw_off, -1, |
| + MAX_ALLOWED(FIELD_TYPE(ex_t, off)), |
| + ':', FALSE)) && |
| + (ex_str = read_llong(ex_str, &raw_len, 1, |
| + MAX_ALLOWED(FIELD_TYPE(ex_t, len)), |
| + ',', i >= last_i)))) |
| + return -1; /* parsing error */ |
| + if (ex_arr) { |
| + ex_arr[i].off = raw_off; |
| + ex_arr[i].len = raw_len; |
| + } |
| + } |
| + return i; |
| +} |
| + |
| + |
| +ex_t * |
| +extents_parse(const char *ex_str, ex_t *ex_arr, size_t *ex_count_p) |
| +{ |
| + /* Sanity checks: a string must be provided; if an array is provided, an |
| + * array count must be given as well. */ |
| + if (!ex_str || (ex_arr && !ex_count_p)) |
| + return NULL; |
| + |
| + /* Parse string and count extents. */ |
| + ssize_t ret = extents_read(ex_str, NULL, 0); |
| + if (ret < 0) |
| + return NULL; /* parsing error */ |
| + size_t ex_count = (size_t)ret; |
| + |
| + /* Input is good, commit to extent count. */ |
| + if (ex_count_p) { |
| + size_t alloc_ex_count = *ex_count_p; |
| + *ex_count_p = ex_count; |
| + if (ex_arr && alloc_ex_count < ex_count) |
| + return NULL; /* insufficient allocated space */ |
| + } |
| + if (ex_count == 0) |
| + return NULL; /* no extents, nothing to do */ |
| + |
| + /* Allocate extent array, if needed. */ |
| + if (!(ex_arr || (ex_arr = (ex_t *)malloc(sizeof(ex_t) * ex_count)))) |
| + return NULL; /* allocation failed */ |
| + |
| + /* Populate the extent array. */ |
| + extents_read(ex_str, ex_arr, ex_count); |
| + |
| + return ex_arr; |
| +} |
| new file mode 100644 |
| --- /dev/null |
| +++ extents.h |
| @@ -0,0 +1,22 @@ |
| +#ifndef __EXTENTS_H |
| +#define __EXTENTS_H |
| + |
| +#include "exfile.h" |
| + |
| + |
| +/* Parses a string representation |ex_str| and populates an array |ex_arr| |
| + * consisting of |*ex_count_p| extents. The string is expected to be a |
| + * comma-separated list of pairs of the form "offset:length". An offset may be |
| + * -1 or a non-negative integer; the former indicates a sparse extent |
| + * (consisting of zeros). A length is a positive integer. If |ex_arr| is NULL, |
| + * |*ex_count_p| is ignored and a new array is allocated based on the actual |
| + * number of extents parsed. Upon success, returns a pointer to the populated |
| + * array of extents and stores the actual number of extents at the location |
| + * pointed to be |ex_count_p| (if provided). If the string parses correctly but |
| + * the operation otherwise fails (allocation error, array too small), returns |
| + * NULL but still store the number of parsed extents. Otherwise, returns NULL |
| + * and does not store anything. If a new array was allocated, then it should be |
| + * deallocated with free(3). */ |
| +ex_t *extents_parse(const char *ex_str, ex_t *ex_arr, size_t *ex_count_p); |
| + |
| +#endif /* __EXTENTS_H */ |