Date: Mon, 25 Jul 2016 03:58:19 +0000 (UTC) From: Xin LI <delphij@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r303285 - in head: contrib/libdivsufsort contrib/libdivsufsort/CMakeModules contrib/libdivsufsort/examples contrib/libdivsufsort/include contrib/libdivsufsort/lib contrib/libdivsufsort/... Message-ID: <201607250358.u6P3wJtw012266@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: delphij Date: Mon Jul 25 03:58:19 2016 New Revision: 303285 URL: https://svnweb.freebsd.org/changeset/base/303285 Log: Change bsdiff to use divsufsort suffix sort library instead of qsufsort, which is more efficient. Note that for now we do not create a separate library for libdivsufsort because it's not used anywhere else. Obtained from: Chromium MFC after: 2 months Added: head/contrib/libdivsufsort/ - copied from r303279, vendor/libdivsufsort/dist/ head/usr.bin/bsdiff/bsdiff/config.h (contents, props changed) head/usr.bin/bsdiff/bsdiff/divsufsort64.h (contents, props changed) Deleted: head/contrib/libdivsufsort/.gitignore head/contrib/libdivsufsort/CMakeLists.txt head/contrib/libdivsufsort/CMakeModules/ head/contrib/libdivsufsort/VERSION.cmake head/contrib/libdivsufsort/examples/ head/contrib/libdivsufsort/include/CMakeLists.txt head/contrib/libdivsufsort/include/lfs.h.cmake head/contrib/libdivsufsort/lib/CMakeLists.txt head/contrib/libdivsufsort/pkgconfig/ Modified: head/usr.bin/bsdiff/bsdiff/Makefile head/usr.bin/bsdiff/bsdiff/bsdiff.c Modified: head/usr.bin/bsdiff/bsdiff/Makefile ============================================================================== --- head/usr.bin/bsdiff/bsdiff/Makefile Mon Jul 25 03:30:26 2016 (r303284) +++ head/usr.bin/bsdiff/bsdiff/Makefile Mon Jul 25 03:58:19 2016 (r303285) @@ -1,6 +1,17 @@ # $FreeBSD$ -PROG= bsdiff +PROG= bsdiff + +# libdivsufsort configured with: +# cmake -DCMAKE_BUILD_TYPE="Release" -DBUILD_DIVSUFSORT64=ON +.PATH: ${.CURDIR}/../../../contrib/libdivsufsort/lib +CFLAGS+= -DHAVE_CONFIG_H=1 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE +CFLAGS+= -D_LARGE_FILES -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS +CFLAGS+= -D__STDC_LIMIT_MACROS -DBUILD_DIVSUFSORT64 +CFLAGS+= -I${.CURDIR}/../../../contrib/libdivsufsort/include -I${.CURDIR} +SRCS= divsufsort.c sssort.c trsort.c utils.c + +SRCS+= bsdiff.c LIBADD= bz2 Modified: head/usr.bin/bsdiff/bsdiff/bsdiff.c ============================================================================== --- head/usr.bin/bsdiff/bsdiff/bsdiff.c Mon Jul 25 03:30:26 2016 (r303284) +++ head/usr.bin/bsdiff/bsdiff/bsdiff.c Mon Jul 25 03:58:19 2016 (r303285) @@ -44,106 +44,11 @@ __FBSDID("$FreeBSD$"); #define O_BINARY 0 #endif -#define MIN(x,y) (((x)<(y)) ? (x) : (y)) - -static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h) -{ - off_t i,j,k,x,tmp,jj,kk; - - if(len<16) { - for(k=start;k<start+len;k+=j) { - j=1;x=V[I[k]+h]; - for(i=1;k+i<start+len;i++) { - if(V[I[k+i]+h]<x) { - x=V[I[k+i]+h]; - j=0; - } - if(V[I[k+i]+h]==x) { - tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp; - j++; - } - } - for(i=0;i<j;i++) V[I[k+i]]=k+j-1; - if(j==1) I[k]=-1; - } - return; - } - - x=V[I[start+len/2]+h]; - jj=0;kk=0; - for(i=start;i<start+len;i++) { - if(V[I[i]+h]<x) jj++; - if(V[I[i]+h]==x) kk++; - } - jj+=start;kk+=jj; - - i=start;j=0;k=0; - while(i<jj) { - if(V[I[i]+h]<x) { - i++; - } else if(V[I[i]+h]==x) { - tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp; - j++; - } else { - tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp; - k++; - } - } - - while(jj+j<kk) { - if(V[I[jj+j]+h]==x) { - j++; - } else { - tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp; - k++; - } - } - - if(jj>start) split(I,V,start,jj-start,h); - - for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; - if(jj==kk-1) I[jj]=-1; - - if(start+len>kk) split(I,V,kk,start+len-kk,h); -} - -static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize) -{ - off_t buckets[256]; - off_t i,h,len; - - for(i=0;i<256;i++) buckets[i]=0; - for(i=0;i<oldsize;i++) buckets[old[i]]++; - for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; - for(i=255;i>0;i--) buckets[i]=buckets[i-1]; - buckets[0]=0; - - for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i; - I[0]=oldsize; - for(i=0;i<oldsize;i++) V[i]=buckets[old[i]]; - V[oldsize]=0; - for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1; - I[0]=-1; - - for(h=1;I[0]!=-(oldsize+1);h+=h) { - len=0; - for(i=0;i<oldsize+1;) { - if(I[i]<0) { - len-=I[i]; - i-=I[i]; - } else { - if(len) I[i-len]=-len; - len=V[I[i]]+1-i; - split(I,V,i,len,h); - i+=len; - len=0; - } - } - if(len) I[i-len]=-len; - } +#include "divsufsort64.h" +#define saidx_t saidx64_t +#define divsufsort divsufsort64 - for(i=0;i<oldsize+1;i++) I[V[i]]=i; -} +#define MIN(x,y) (((x)<(y)) ? (x) : (y)) static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize) { @@ -212,7 +117,7 @@ int main(int argc,char *argv[]) int fd; u_char *old,*new; off_t oldsize,newsize; - off_t *I,*V; + saidx_t *I; off_t scan,pos,len; off_t lastscan,lastpos,lastoffset; off_t oldscore,scsc; @@ -248,12 +153,9 @@ int main(int argc,char *argv[]) (read(fd,old,oldsize)!=oldsize) || (close(fd)==-1)) err(1,"%s",argv[1]); - if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) || - ((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL); - - qsufsort(I,V,old,oldsize); + if(((I=malloc((oldsize+1)*sizeof(saidx_t)))==NULL)) err(1,NULL); - free(V); + if(divsufsort(old, I, oldsize)) err(1, "divsufsort"); /* Allocate newsize+1 bytes instead of newsize bytes to ensure that we never try to malloc(0) and get a NULL pointer */ Added: head/usr.bin/bsdiff/bsdiff/config.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/usr.bin/bsdiff/bsdiff/config.h Mon Jul 25 03:58:19 2016 (r303285) @@ -0,0 +1,83 @@ +/* + * config.h for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * $FreeBSD$ + */ + +#ifndef _CONFIG_H +#define _CONFIG_H 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** Define to the version of this package. **/ +#define PROJECT_VERSION_FULL "2.0.1-14-g5f60d6f" + +/** Define to 1 if you have the header files. **/ +#define HAVE_INTTYPES_H 1 +#define HAVE_STDDEF_H 1 +#define HAVE_STDINT_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_STRING_H 1 +#define HAVE_STRINGS_H 1 +#define HAVE_MEMORY_H 1 +#define HAVE_SYS_TYPES_H 1 + +/** for WinIO **/ +/* #undef HAVE_IO_H */ +/* #undef HAVE_FCNTL_H */ +/* #undef HAVE__SETMODE */ +/* #undef HAVE_SETMODE */ +/* #undef HAVE__FILENO */ +/* #undef HAVE_FOPEN_S */ +/* #undef HAVE__O_BINARY */ +#ifndef HAVE__SETMODE +# if HAVE_SETMODE +# define _setmode setmode +# define HAVE__SETMODE 1 +# endif +# if HAVE__SETMODE && !HAVE__O_BINARY +# define _O_BINARY 0 +# define HAVE__O_BINARY 1 +# endif +#endif + +/** for inline **/ +#ifndef INLINE +# define INLINE inline +#endif + +/** for VC++ warning **/ +#ifdef _MSC_VER +#pragma warning(disable: 4127) +#endif + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* _CONFIG_H */ Added: head/usr.bin/bsdiff/bsdiff/divsufsort64.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/usr.bin/bsdiff/bsdiff/divsufsort64.h Mon Jul 25 03:58:19 2016 (r303285) @@ -0,0 +1,180 @@ +/* + * divsufsort64.h for libdivsufsort64 + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DIVSUFSORT64_H +#define _DIVSUFSORT64_H 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#include <inttypes.h> + +#ifndef DIVSUFSORT_API +# ifdef DIVSUFSORT_BUILD_DLL +# define DIVSUFSORT_API +# else +# define DIVSUFSORT_API +# endif +#endif + +/*- Datatypes -*/ +#ifndef SAUCHAR_T +#define SAUCHAR_T +typedef uint8_t sauchar_t; +#endif /* SAUCHAR_T */ +#ifndef SAINT_T +#define SAINT_T +typedef int32_t saint_t; +#endif /* SAINT_T */ +#ifndef SAIDX64_T +#define SAIDX64_T +typedef int64_t saidx64_t; +#endif /* SAIDX64_T */ +#ifndef PRIdSAINT_T +#define PRIdSAINT_T PRId32 +#endif /* PRIdSAINT_T */ +#ifndef PRIdSAIDX64_T +#define PRIdSAIDX64_T PRId64 +#endif /* PRIdSAIDX64_T */ + + +/*- Prototypes -*/ + +/** + * Constructs the suffix array of a given string. + * @param T[0..n-1] The input string. + * @param SA[0..n-1] The output array of suffixes. + * @param n The length of the given string. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +DIVSUFSORT_API +saint_t +divsufsort64(const sauchar_t *T, saidx64_t *SA, saidx64_t n); + +/** + * Constructs the burrows-wheeler transformed string of a given string. + * @param T[0..n-1] The input string. + * @param U[0..n-1] The output string. (can be T) + * @param A[0..n-1] The temporary array. (can be NULL) + * @param n The length of the given string. + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ +DIVSUFSORT_API +saidx64_t +divbwt64(const sauchar_t *T, sauchar_t *U, saidx64_t *A, saidx64_t n); + +/** + * Returns the version of the divsufsort library. + * @return The version number string. + */ +DIVSUFSORT_API +const char * +divsufsort64_version(void); + + +/** + * Constructs the burrows-wheeler transformed string of a given string and suffix array. + * @param T[0..n-1] The input string. + * @param U[0..n-1] The output string. (can be T) + * @param SA[0..n-1] The suffix array. (can be NULL) + * @param n The length of the given string. + * @param idx The output primary index. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +DIVSUFSORT_API +saint_t +bw_transform64(const sauchar_t *T, sauchar_t *U, + saidx64_t *SA /* can NULL */, + saidx64_t n, saidx64_t *idx); + +/** + * Inverse BW-transforms a given BWTed string. + * @param T[0..n-1] The input string. + * @param U[0..n-1] The output string. (can be T) + * @param A[0..n-1] The temporary array. (can be NULL) + * @param n The length of the given string. + * @param idx The primary index. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +DIVSUFSORT_API +saint_t +inverse_bw_transform64(const sauchar_t *T, sauchar_t *U, + saidx64_t *A /* can NULL */, + saidx64_t n, saidx64_t idx); + +/** + * Checks the correctness of a given suffix array. + * @param T[0..n-1] The input string. + * @param SA[0..n-1] The input suffix array. + * @param n The length of the given string. + * @param verbose The verbose mode. + * @return 0 if no error occurred. + */ +DIVSUFSORT_API +saint_t +sufcheck64(const sauchar_t *T, const saidx64_t *SA, saidx64_t n, saint_t verbose); + +/** + * Search for the pattern P in the string T. + * @param T[0..Tsize-1] The input string. + * @param Tsize The length of the given string. + * @param P[0..Psize-1] The input pattern string. + * @param Psize The length of the given pattern string. + * @param SA[0..SAsize-1] The input suffix array. + * @param SAsize The length of the given suffix array. + * @param idx The output index. + * @return The count of matches if no error occurred, -1 otherwise. + */ +DIVSUFSORT_API +saidx64_t +sa_search64(const sauchar_t *T, saidx64_t Tsize, + const sauchar_t *P, saidx64_t Psize, + const saidx64_t *SA, saidx64_t SAsize, + saidx64_t *left); + +/** + * Search for the character c in the string T. + * @param T[0..Tsize-1] The input string. + * @param Tsize The length of the given string. + * @param SA[0..SAsize-1] The input suffix array. + * @param SAsize The length of the given suffix array. + * @param c The input character. + * @param idx The output index. + * @return The count of matches if no error occurred, -1 otherwise. + */ +DIVSUFSORT_API +saidx64_t +sa_simplesearch64(const sauchar_t *T, saidx64_t Tsize, + const saidx64_t *SA, saidx64_t SAsize, + saint_t c, saidx64_t *left); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* _DIVSUFSORT64_H */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201607250358.u6P3wJtw012266>