blob: feaf9f44d2a3229088b39016d78ab2cef229c109 [file] [log] [blame]
Make bsdiff use libdivsufsort + tiny / cosmetic fixes.
--- bsdiff.1
+++ bsdiff.1
@@ -33,20 +33,21 @@
.Nd generate a patch between two binary files
.Sh SYNOPSIS
.Nm
-.Ao Ar oldfile Ac Ao Ar newfile Ac Ao Ar patchfile Ac
+.Ar oldfile newfile patchfile
.Sh DESCRIPTION
.Nm
compares
-.Ao Ar oldfile Ac
+.Ar oldfile
to
-.Ao Ar newfile Ac
+.Ar newfile
and writes to
-.Ao Ar patchfile Ac
-a binary patch suitable for use by bspatch(1).
+.Ar patchfile
+a binary patch suitable for use by
+.Xr bspatch 1 .
When
-.Ao Ar oldfile Ac
+.Ar oldfile
and
-.Ao Ar newfile Ac
+.Ar newfile
are two versions of an executable program, the
patches produced are on average a factor of five smaller
than those produced by any other binary patch tool known
@@ -54,7 +55,7 @@ to the author.
.Pp
.Nm
uses memory equal to 17 times the size of
-.Ao Ar oldfile Ac ,
+.Ar oldfile ,
and requires
an absolute minimum working set size of 8 times the size of oldfile.
.Sh SEE ALSO
--- bsdiff.c
+++ bsdiff.c
@@ -38,106 +38,15 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05
#include <string.h>
#include <unistd.h>
-#define MIN(x,y) (((x)<(y)) ? (x) : (y))
-
-static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h)
-{
- off_t i,j,k,x,tmp,jj,kk;
-
- if(len<16) {
- for(k=start;k<start+len;k+=j) {
- j=1;x=V[I[k]+h];
- for(i=1;k+i<start+len;i++) {
- if(V[I[k+i]+h]<x) {
- x=V[I[k+i]+h];
- j=0;
- };
- if(V[I[k+i]+h]==x) {
- tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
- j++;
- };
- };
- for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
- if(j==1) I[k]=-1;
- };
- return;
- };
-
- x=V[I[start+len/2]+h];
- jj=0;kk=0;
- for(i=start;i<start+len;i++) {
- if(V[I[i]+h]<x) jj++;
- if(V[I[i]+h]==x) kk++;
- };
- jj+=start;kk+=jj;
-
- i=start;j=0;k=0;
- while(i<jj) {
- if(V[I[i]+h]<x) {
- i++;
- } else if(V[I[i]+h]==x) {
- tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
- j++;
- } else {
- tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
- k++;
- };
- };
-
- while(jj+j<kk) {
- if(V[I[jj+j]+h]==x) {
- j++;
- } else {
- tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
- k++;
- };
- };
-
- if(jj>start) split(I,V,start,jj-start,h);
-
- for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
- if(jj==kk-1) I[jj]=-1;
-
- if(start+len>kk) split(I,V,kk,start+len-kk,h);
-}
-
-static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize)
-{
- off_t buckets[256];
- off_t i,h,len;
-
- for(i=0;i<256;i++) buckets[i]=0;
- for(i=0;i<oldsize;i++) buckets[old[i]]++;
- for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
- for(i=255;i>0;i--) buckets[i]=buckets[i-1];
- buckets[0]=0;
-
- for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
- I[0]=oldsize;
- for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
- V[oldsize]=0;
- for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
- I[0]=-1;
-
- for(h=1;I[0]!=-(oldsize+1);h+=h) {
- len=0;
- for(i=0;i<oldsize+1;) {
- if(I[i]<0) {
- len-=I[i];
- i-=I[i];
- } else {
- if(len) I[i-len]=-len;
- len=V[I[i]]+1-i;
- split(I,V,i,len,h);
- i+=len;
- len=0;
- };
- };
- if(len) I[i-len]=-len;
- };
+#if _FILE_OFFSET_BITS == 64
+#include "divsufsort64.h"
+#define saidx_t saidx64_t
+#define divsufsort divsufsort64
+#else
+#include "divsufsort.h"
+#endif
- for(i=0;i<oldsize+1;i++) I[V[i]]=i;
-}
+#define MIN(x,y) (((x)<(y)) ? (x) : (y))
static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize)
{
@@ -149,7 +58,7 @@ static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize)
return i;
}
-static off_t search(off_t *I,u_char *old,off_t oldsize,
+static off_t search(saidx_t *I,u_char *old,off_t oldsize,
u_char *new,off_t newsize,off_t st,off_t en,off_t *pos)
{
off_t x,y;
@@ -168,7 +77,7 @@ static off_t search(off_t *I,u_char *old,off_t oldsize,
};
x=st+(en-st)/2;
- if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) {
+ if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<=0) {
return search(I,old,oldsize,new,newsize,x,en,pos);
} else {
return search(I,old,oldsize,new,newsize,st,x,pos);
@@ -198,8 +107,8 @@ int main(int argc,char *argv[])
int fd;
u_char *old,*new;
off_t oldsize,newsize;
- off_t *I,*V;
- off_t scan,pos,len;
+ saidx_t *I;
+ off_t scan,pos=0,len;
off_t lastscan,lastpos,lastoffset;
off_t oldscore,scsc;
off_t s,Sf,lenf,Sb,lenb;
@@ -224,12 +133,9 @@ int main(int argc,char *argv[])
(read(fd,old,oldsize)!=oldsize) ||
(close(fd)==-1)) err(1,"%s",argv[1]);
- if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) ||
- ((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL);
-
- qsufsort(I,V,old,oldsize);
+ if(((I=malloc((oldsize+1)*sizeof(saidx_t)))==NULL)) err(1,NULL);
- free(V);
+ if(divsufsort(old, I, oldsize)) err(1, "divsufsort");
/* Allocate newsize+1 bytes instead of newsize bytes to ensure
that we never try to malloc(0) and get a NULL pointer */
@@ -274,7 +180,17 @@ int main(int argc,char *argv[])
while(scan<newsize) {
oldscore=0;
+ /* If we come across a large block of data that only differs
+ * by less than 8 bytes, this loop will take a long time to
+ * go past that block of data. We need to track the number of
+ * times we're stuck in the block and break out of it. */
+ int num_less_than_eight = 0;
+ off_t prev_len, prev_oldscore, prev_pos;
for(scsc=scan+=len;scan<newsize;scan++) {
+ prev_len=len;
+ prev_oldscore=oldscore;
+ prev_pos=pos;
+
len=search(I,old,oldsize,new+scan,newsize-scan,
0,oldsize,&pos);
@@ -289,6 +205,17 @@ int main(int argc,char *argv[])
if((scan+lastoffset<oldsize) &&
(old[scan+lastoffset] == new[scan]))
oldscore--;
+
+ const off_t fuzz = 8;
+ if (prev_len-fuzz<=len && len<=prev_len &&
+ prev_oldscore-fuzz<=oldscore &&
+ oldscore<=prev_oldscore &&
+ prev_pos<=pos && pos <=prev_pos+fuzz &&
+ oldscore<=len && len<=oldscore+fuzz)
+ ++num_less_than_eight;
+ else
+ num_less_than_eight=0;
+ if (num_less_than_eight > 100) break;
};
if((len!=oldscore) || (scan==newsize)) {