| Make bsdiff use libdivsufsort + tiny / cosmetic fixes. |
| --- bsdiff.1 |
| +++ bsdiff.1 |
| @@ -33,20 +33,21 @@ |
| .Nd generate a patch between two binary files |
| .Sh SYNOPSIS |
| .Nm |
| -.Ao Ar oldfile Ac Ao Ar newfile Ac Ao Ar patchfile Ac |
| +.Ar oldfile newfile patchfile |
| .Sh DESCRIPTION |
| .Nm |
| compares |
| -.Ao Ar oldfile Ac |
| +.Ar oldfile |
| to |
| -.Ao Ar newfile Ac |
| +.Ar newfile |
| and writes to |
| -.Ao Ar patchfile Ac |
| -a binary patch suitable for use by bspatch(1). |
| +.Ar patchfile |
| +a binary patch suitable for use by |
| +.Xr bspatch 1 . |
| When |
| -.Ao Ar oldfile Ac |
| +.Ar oldfile |
| and |
| -.Ao Ar newfile Ac |
| +.Ar newfile |
| are two versions of an executable program, the |
| patches produced are on average a factor of five smaller |
| than those produced by any other binary patch tool known |
| @@ -54,7 +55,7 @@ to the author. |
| .Pp |
| .Nm |
| uses memory equal to 17 times the size of |
| -.Ao Ar oldfile Ac , |
| +.Ar oldfile , |
| and requires |
| an absolute minimum working set size of 8 times the size of oldfile. |
| .Sh SEE ALSO |
| --- bsdiff.c |
| +++ bsdiff.c |
| @@ -38,106 +38,15 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 |
| #include <string.h> |
| #include <unistd.h> |
| |
| -#define MIN(x,y) (((x)<(y)) ? (x) : (y)) |
| - |
| -static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h) |
| -{ |
| - off_t i,j,k,x,tmp,jj,kk; |
| - |
| - if(len<16) { |
| - for(k=start;k<start+len;k+=j) { |
| - j=1;x=V[I[k]+h]; |
| - for(i=1;k+i<start+len;i++) { |
| - if(V[I[k+i]+h]<x) { |
| - x=V[I[k+i]+h]; |
| - j=0; |
| - }; |
| - if(V[I[k+i]+h]==x) { |
| - tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp; |
| - j++; |
| - }; |
| - }; |
| - for(i=0;i<j;i++) V[I[k+i]]=k+j-1; |
| - if(j==1) I[k]=-1; |
| - }; |
| - return; |
| - }; |
| - |
| - x=V[I[start+len/2]+h]; |
| - jj=0;kk=0; |
| - for(i=start;i<start+len;i++) { |
| - if(V[I[i]+h]<x) jj++; |
| - if(V[I[i]+h]==x) kk++; |
| - }; |
| - jj+=start;kk+=jj; |
| - |
| - i=start;j=0;k=0; |
| - while(i<jj) { |
| - if(V[I[i]+h]<x) { |
| - i++; |
| - } else if(V[I[i]+h]==x) { |
| - tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp; |
| - j++; |
| - } else { |
| - tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp; |
| - k++; |
| - }; |
| - }; |
| - |
| - while(jj+j<kk) { |
| - if(V[I[jj+j]+h]==x) { |
| - j++; |
| - } else { |
| - tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp; |
| - k++; |
| - }; |
| - }; |
| - |
| - if(jj>start) split(I,V,start,jj-start,h); |
| - |
| - for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; |
| - if(jj==kk-1) I[jj]=-1; |
| - |
| - if(start+len>kk) split(I,V,kk,start+len-kk,h); |
| -} |
| - |
| -static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize) |
| -{ |
| - off_t buckets[256]; |
| - off_t i,h,len; |
| - |
| - for(i=0;i<256;i++) buckets[i]=0; |
| - for(i=0;i<oldsize;i++) buckets[old[i]]++; |
| - for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; |
| - for(i=255;i>0;i--) buckets[i]=buckets[i-1]; |
| - buckets[0]=0; |
| - |
| - for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i; |
| - I[0]=oldsize; |
| - for(i=0;i<oldsize;i++) V[i]=buckets[old[i]]; |
| - V[oldsize]=0; |
| - for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1; |
| - I[0]=-1; |
| - |
| - for(h=1;I[0]!=-(oldsize+1);h+=h) { |
| - len=0; |
| - for(i=0;i<oldsize+1;) { |
| - if(I[i]<0) { |
| - len-=I[i]; |
| - i-=I[i]; |
| - } else { |
| - if(len) I[i-len]=-len; |
| - len=V[I[i]]+1-i; |
| - split(I,V,i,len,h); |
| - i+=len; |
| - len=0; |
| - }; |
| - }; |
| - if(len) I[i-len]=-len; |
| - }; |
| +#if _FILE_OFFSET_BITS == 64 |
| +#include "divsufsort64.h" |
| +#define saidx_t saidx64_t |
| +#define divsufsort divsufsort64 |
| +#else |
| +#include "divsufsort.h" |
| +#endif |
| |
| - for(i=0;i<oldsize+1;i++) I[V[i]]=i; |
| -} |
| +#define MIN(x,y) (((x)<(y)) ? (x) : (y)) |
| |
| static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize) |
| { |
| @@ -149,7 +58,7 @@ static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize) |
| return i; |
| } |
| |
| -static off_t search(off_t *I,u_char *old,off_t oldsize, |
| +static off_t search(saidx_t *I,u_char *old,off_t oldsize, |
| u_char *new,off_t newsize,off_t st,off_t en,off_t *pos) |
| { |
| off_t x,y; |
| @@ -168,7 +77,7 @@ static off_t search(off_t *I,u_char *old,off_t oldsize, |
| }; |
| |
| x=st+(en-st)/2; |
| - if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) { |
| + if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<=0) { |
| return search(I,old,oldsize,new,newsize,x,en,pos); |
| } else { |
| return search(I,old,oldsize,new,newsize,st,x,pos); |
| @@ -198,8 +107,8 @@ int main(int argc,char *argv[]) |
| int fd; |
| u_char *old,*new; |
| off_t oldsize,newsize; |
| - off_t *I,*V; |
| - off_t scan,pos,len; |
| + saidx_t *I; |
| + off_t scan,pos=0,len; |
| off_t lastscan,lastpos,lastoffset; |
| off_t oldscore,scsc; |
| off_t s,Sf,lenf,Sb,lenb; |
| @@ -224,12 +133,9 @@ int main(int argc,char *argv[]) |
| (read(fd,old,oldsize)!=oldsize) || |
| (close(fd)==-1)) err(1,"%s",argv[1]); |
| |
| - if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) || |
| - ((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL); |
| - |
| - qsufsort(I,V,old,oldsize); |
| + if(((I=malloc((oldsize+1)*sizeof(saidx_t)))==NULL)) err(1,NULL); |
| |
| - free(V); |
| + if(divsufsort(old, I, oldsize)) err(1, "divsufsort"); |
| |
| /* Allocate newsize+1 bytes instead of newsize bytes to ensure |
| that we never try to malloc(0) and get a NULL pointer */ |
| @@ -274,7 +180,17 @@ int main(int argc,char *argv[]) |
| while(scan<newsize) { |
| oldscore=0; |
| |
| + /* If we come across a large block of data that only differs |
| + * by less than 8 bytes, this loop will take a long time to |
| + * go past that block of data. We need to track the number of |
| + * times we're stuck in the block and break out of it. */ |
| + int num_less_than_eight = 0; |
| + off_t prev_len, prev_oldscore, prev_pos; |
| for(scsc=scan+=len;scan<newsize;scan++) { |
| + prev_len=len; |
| + prev_oldscore=oldscore; |
| + prev_pos=pos; |
| + |
| len=search(I,old,oldsize,new+scan,newsize-scan, |
| 0,oldsize,&pos); |
| |
| @@ -289,6 +205,17 @@ int main(int argc,char *argv[]) |
| if((scan+lastoffset<oldsize) && |
| (old[scan+lastoffset] == new[scan])) |
| oldscore--; |
| + |
| + const off_t fuzz = 8; |
| + if (prev_len-fuzz<=len && len<=prev_len && |
| + prev_oldscore-fuzz<=oldscore && |
| + oldscore<=prev_oldscore && |
| + prev_pos<=pos && pos <=prev_pos+fuzz && |
| + oldscore<=len && len<=oldscore+fuzz) |
| + ++num_less_than_eight; |
| + else |
| + num_less_than_eight=0; |
| + if (num_less_than_eight > 100) break; |
| }; |
| |
| if((len!=oldscore) || (scan==newsize)) { |