Date: Fri, 27 Mar 2026 06:22:20 +0000 From: Xin LI <delphij@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org Subject: git: 74f357256769 - stable/15 - MFC: MFV: zlib 1.3.2. Message-ID: <69c6221c.43121.3633e3f0@gitrepo.freebsd.org>
index | next in thread | raw e-mail
The branch stable/15 has been updated by delphij: URL: https://cgit.FreeBSD.org/src/commit/?id=74f357256769e12c3a843c75d1def483234312f3 commit 74f357256769e12c3a843c75d1def483234312f3 Author: Xin LI <delphij@FreeBSD.org> AuthorDate: 2026-03-13 23:49:53 +0000 Commit: Xin LI <delphij@FreeBSD.org> CommitDate: 2026-03-27 06:22:02 +0000 MFC: MFV: zlib 1.3.2. Relnotes: yes (cherry picked from commit 7aa1dba6b00ccfb7d66627badc8a7aaa06b02946) --- lib/libz/Symbol.map | 13 + lib/libz/Versions.def | 6 + sys/contrib/zlib/ChangeLog | 51 +++ sys/contrib/zlib/FAQ | 46 +- sys/contrib/zlib/LICENSE | 2 +- sys/contrib/zlib/README | 28 +- sys/contrib/zlib/compress.c | 44 +- sys/contrib/zlib/contrib/README.contrib | 57 --- sys/contrib/zlib/contrib/gcc_gvmat64/gvmat64.S | 574 ------------------------- sys/contrib/zlib/crc32.c | 164 +++---- sys/contrib/zlib/deflate.c | 176 +++++--- sys/contrib/zlib/deflate.h | 8 +- sys/contrib/zlib/doc/algorithm.txt | 2 +- sys/contrib/zlib/gzguts.h | 64 +-- sys/contrib/zlib/gzlib.c | 103 +++-- sys/contrib/zlib/gzread.c | 296 ++++++++----- sys/contrib/zlib/gzwrite.c | 267 +++++++----- sys/contrib/zlib/infback.c | 87 +--- sys/contrib/zlib/inffast.c | 13 +- sys/contrib/zlib/inffixed.h | 182 ++++---- sys/contrib/zlib/inflate.c | 189 ++------ sys/contrib/zlib/inflate.h | 2 +- sys/contrib/zlib/inftrees.c | 143 +++++- sys/contrib/zlib/inftrees.h | 4 +- sys/contrib/zlib/test/example.c | 14 +- sys/contrib/zlib/test/infcover.c | 10 +- sys/contrib/zlib/test/minigzip.c | 89 ++-- sys/contrib/zlib/trees.c | 28 +- sys/contrib/zlib/uncompr.c | 62 ++- sys/contrib/zlib/zconf.h | 54 ++- sys/contrib/zlib/zconf.h.in | 46 +- sys/contrib/zlib/zlib.3 | 22 +- sys/contrib/zlib/zlib.h | 309 +++++++++---- sys/contrib/zlib/zlib.map | 16 + sys/contrib/zlib/zlib.pc.in | 1 + sys/contrib/zlib/zutil.c | 84 ++-- sys/contrib/zlib/zutil.h | 99 ++++- 37 files changed, 1613 insertions(+), 1742 deletions(-) diff --git a/lib/libz/Symbol.map b/lib/libz/Symbol.map index 7bfe7cceda77..5df1d5253f91 100644 --- a/lib/libz/Symbol.map +++ b/lib/libz/Symbol.map @@ -7,6 +7,19 @@ ZLIB_1.2.12 { crc32_combine_op; }; +ZLIB_1.3.1.2 { + deflateUsed; +}; + +ZLIB_1.3.2 { + compressBound_z; + compress_z; + compress2_z; + deflateBound_z; + uncompress_z; + uncompress2_z; +}; + ZLIB_1.2.9 { inflateCodesUsed; inflateValidate; diff --git a/lib/libz/Versions.def b/lib/libz/Versions.def index 2ee0106b5bfe..73b197240a4c 100644 --- a/lib/libz/Versions.def +++ b/lib/libz/Versions.def @@ -14,6 +14,12 @@ ZLIB_1.2.9 { ZLIB_1.2.12 { } ZLIB_1.2.9; +ZLIB_1.3.1.2 { +} ZLIB_1.2.12; + +ZLIB_1.3.2 { +} ZLIB_1.3.1.2; + FBSD_1.2 { } ZLIB_1.2.4.0; diff --git a/sys/contrib/zlib/ChangeLog b/sys/contrib/zlib/ChangeLog index b801a1031ec0..312753edadef 100644 --- a/sys/contrib/zlib/ChangeLog +++ b/sys/contrib/zlib/ChangeLog @@ -1,6 +1,57 @@ ChangeLog file for zlib +Changes in 1.3.2 (17 Feb 2026) +- Continued rewrite of CMake build [Vollstrecker] +- Various portability improvements +- Various github workflow additions and improvements +- Check for negative lengths in crc32_combine functions +- Copy only the initialized window contents in inflateCopy +- Prevent the use of insecure functions without an explicit request +- Add compressBound_z and deflateBound_z functions for large values +- Use atomics to build inflate fixed tables once +- Add definition of ZLIB_INSECURE to build tests with c89 and c94 +- Add --undefined option to ./configure for UBSan checker +- Copy only the initialized deflate state in deflateCopy +- Zero inflate state on allocation +- Remove untgz from contrib +- Add _z versions of the compress and uncompress functions +- Vectorize the CRC-32 calculation on the s390x +- Set bit 11 of the zip header flags in minizip if UTF-8 +- Update OS/400 support +- Add a test to configure to check for a working compiler +- Check for invalid NULL pointer inputs to zlib operations +- Add --mandir to ./configure to specify manual directory +- Add LICENSE.Info-Zip to contrib/minizip +- Remove vstudio projects in lieu of cmake-generated projects +- Replace strcpy() with memcpy() in contrib/minizip + +Changes in 1.3.1.2 (8 Dec 2025) +- Improve portability to RISC OS +- Permit compiling contrib/minizip/unzip.c with decryption +- Enable build of shared library on AIX +- Make deflateBound() more conservative and handle Z_STREAM_END +- Add zipAlreadyThere() to minizip zip.c to help avoid duplicates +- Make z_off_t 64 bits by default +- Add deflateUsed() function to get the used bits in the last byte +- Avoid out-of-bounds pointer arithmetic in inflateCopy() +- Add Haiku to configure for proper LDSHARED settings +- Add Bazel targets +- Complete rewrite of CMake build [Vollstrecker] +- Clarify the use of errnum in gzerror() +- Note that gzseek() requests are deferred until the next operation +- Note the use of gzungetc() to run a deferred seek while reading +- Fix bug in inflatePrime() for 16-bit ints +- Add a "G" option to force gzip, disabling transparency in gzread() +- Improve the discrimination between trailing garbage and bad gzip +- Allow gzflush() to write empty gzip members +- Remove redundant frees of point list on error in examples/zran.c +- Clarify the use of inflateGetHeader() +- Update links to the RFCs +- Return all available uncompressed data on error in gzread.c +- Support non-blocking devices in the gz* routines +- Various other small improvements + Changes in 1.3.1 (22 Jan 2024) - Reject overflows of zip header fields in minizip - Fix bug in inflateSync() for data held in bit buffer diff --git a/sys/contrib/zlib/FAQ b/sys/contrib/zlib/FAQ index 92f5d3e29fab..95c1a825acd4 100644 --- a/sys/contrib/zlib/FAQ +++ b/sys/contrib/zlib/FAQ @@ -3,8 +3,8 @@ If your question is not there, please check the zlib home page -http://zlib.net/ which may have more recent information. -The latest zlib FAQ is at http://zlib.net/zlib_faq.html +https://zlib.net/ which may have more recent information. +The latest zlib FAQ is at https://zlib.net/zlib_faq.html 1. Is zlib Y2K-compliant? @@ -19,7 +19,7 @@ The latest zlib FAQ is at http://zlib.net/zlib_faq.html 3. Where can I get a Visual Basic interface to zlib? See - * http://marknelson.us/1997/01/01/zlib-engine/ + * https://zlib.net/nelson/ * win32/DLL_FAQ.txt in the zlib distribution 4. compress() returns Z_BUF_ERROR. @@ -38,7 +38,7 @@ The latest zlib FAQ is at http://zlib.net/zlib_faq.html made with more input or output space. A Z_BUF_ERROR may in fact be unavoidable depending on how the functions are used, since it is not possible to tell whether or not there is more output pending when - strm.avail_out returns with zero. See http://zlib.net/zlib_how.html for a + strm.avail_out returns with zero. See https://zlib.net/zlib_how.html for a heavily annotated example. 6. Where's the zlib documentation (man pages, etc.)? @@ -109,8 +109,8 @@ The latest zlib FAQ is at http://zlib.net/zlib_faq.html 16. Can zlib decode Flate data in an Adobe PDF file? - Yes. See http://www.pdflib.com/ . To modify PDF forms, see - http://sourceforge.net/projects/acroformtool/ . + Yes. See https://www.pdflib.com/ . To modify PDF forms, see + https://sourceforge.net/projects/acroformtool/ . 17. Why am I getting this "register_frame_info not found" error on Solaris? @@ -156,6 +156,10 @@ The latest zlib FAQ is at http://zlib.net/zlib_faq.html library memory allocation routines by default. zlib's *Init* functions allow for the application to provide custom memory allocation routines. + If the non-default BUILDFIXED or DYNAMIC_CRC_TABLE defines are used on a + system without atomics (e.g. pre-C11), then inflate() and crc32() will not + be thread safe. + Of course, you should only operate on any given zlib or gzip stream from a single thread at a time. @@ -235,7 +239,7 @@ The latest zlib FAQ is at http://zlib.net/zlib_faq.html As far as we know, no. In fact, that was originally the whole point behind zlib. Look here for some more information: - http://www.gzip.org/#faq11 + https://web.archive.org/web/20180729212847/http://www.gzip.org/#faq11 32. Can zlib work with greater than 4 GB of data? @@ -258,20 +262,20 @@ The latest zlib FAQ is at http://zlib.net/zlib_faq.html 33. Does zlib have any security vulnerabilities? The only one that we are aware of is potentially in gzprintf(). If zlib is - compiled to use sprintf() or vsprintf(), then there is no protection - against a buffer overflow of an 8K string space (or other value as set by - gzbuffer()), other than the caller of gzprintf() assuring that the output - will not exceed 8K. On the other hand, if zlib is compiled to use - snprintf() or vsnprintf(), which should normally be the case, then there is - no vulnerability. The ./configure script will display warnings if an - insecure variation of sprintf() will be used by gzprintf(). Also the - zlibCompileFlags() function will return information on what variant of - sprintf() is used by gzprintf(). + compiled to use sprintf() or vsprintf(), which requires that ZLIB_INSECURE + be defined, then there is no protection against a buffer overflow of an 8K + string space (or other value as set by gzbuffer()), other than the caller + of gzprintf() assuring that the output will not exceed 8K. On the other + hand, if zlib is compiled to use snprintf() or vsnprintf(), which should + normally be the case, then there is no vulnerability. The ./configure + script will display warnings if an insecure variation of sprintf() will be + used by gzprintf(). Also the zlibCompileFlags() function will return + information on what variant of sprintf() is used by gzprintf(). If you don't have snprintf() or vsnprintf() and would like one, you can - find a portable implementation here: + find a good portable implementation in stb_sprintf.h here: - http://www.ijs.si/software/snprintf/ + https://github.com/nothings/stb Note that you should be using the most recent version of zlib. Versions 1.1.3 and before were subject to a double-free vulnerability, and versions @@ -283,7 +287,7 @@ The latest zlib FAQ is at http://zlib.net/zlib_faq.html Probably what you want is to use zlib in Java. zlib is already included as part of the Java SDK in the java.util.zip package. If you really want a version of zlib written in the Java language, look on the zlib home - page for links: http://zlib.net/ . + page for links: https://zlib.net/ . 35. I get this or that compiler or source-code scanner warning when I crank it up to maximally-pedantic. Can't you guys write proper code? @@ -314,9 +318,9 @@ The latest zlib FAQ is at http://zlib.net/zlib_faq.html zlib doesn't support encryption. The original PKZIP encryption is very weak and can be broken with freely available programs. To get strong - encryption, use GnuPG, http://www.gnupg.org/ , which already includes zlib + encryption, use GnuPG, https://www.gnupg.org/ , which already includes zlib compression. For PKZIP compatible "encryption", look at - http://www.info-zip.org/ + https://infozip.sourceforge.net/ 39. What's the difference between the "gzip" and "deflate" HTTP 1.1 encodings? diff --git a/sys/contrib/zlib/LICENSE b/sys/contrib/zlib/LICENSE index ab8ee6f71428..b7a69d058e61 100644 --- a/sys/contrib/zlib/LICENSE +++ b/sys/contrib/zlib/LICENSE @@ -1,6 +1,6 @@ Copyright notice: - (C) 1995-2022 Jean-loup Gailly and Mark Adler + (C) 1995-2026 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages diff --git a/sys/contrib/zlib/README b/sys/contrib/zlib/README index c5f917540b6f..2b1e6f36fe3e 100644 --- a/sys/contrib/zlib/README +++ b/sys/contrib/zlib/README @@ -1,10 +1,10 @@ ZLIB DATA COMPRESSION LIBRARY -zlib 1.3.1 is a general purpose data compression library. All the code is -thread safe. The data format used by the zlib library is described by RFCs -(Request for Comments) 1950 to 1952 in the files -http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and -rfc1952 (gzip format). +zlib 1.3.2 is a general purpose data compression library. All the code is +thread safe (though see the FAQ for caveats). The data format used by the zlib +library is described by RFCs (Request for Comments) 1950 to 1952 at +https://datatracker.ietf.org/doc/html/rfc1950 (zlib format), rfc1951 (deflate +format) and rfc1952 (gzip format). All functions of the compression library are documented in the file zlib.h (volunteer to write man pages welcome, contact zlib@gzip.org). A usage example @@ -21,17 +21,17 @@ make_vms.com. Questions about zlib should be sent to <zlib@gzip.org>, or to Gilles Vollant <info@winimage.com> for the Windows DLL version. The zlib home page is -http://zlib.net/ . Before reporting a problem, please check this site to +https://zlib.net/ . Before reporting a problem, please check this site to verify that you have the latest version of zlib; otherwise get the latest version and check whether the problem still exists or not. -PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help. +PLEASE read the zlib FAQ https://zlib.net/zlib_faq.html before asking for help. Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan. 1997 issue of Dr. Dobb's Journal; a copy of the article is available at -https://marknelson.us/posts/1997/01/01/zlib-engine.html . +https://zlib.net/nelson/ . -The changes made in version 1.3.1 are documented in the file ChangeLog. +The changes made in version 1.3.2 are documented in the file ChangeLog. Unsupported third party contributions are provided in directory contrib/ . @@ -43,9 +43,9 @@ can be found at https://github.com/pmqs/IO-Compress . A Python interface to zlib written by A.M. Kuchling <amk@amk.ca> is available in Python 1.5 and later versions, see -http://docs.python.org/library/zlib.html . +https://docs.python.org/3/library/zlib.html . -zlib is built into tcl: http://wiki.tcl.tk/4610 . +zlib is built into tcl: https://wiki.tcl-lang.org/page/zlib . An experimental package to read and write files in .zip format, written on top of zlib by Gilles Vollant <info@winimage.com>, is available in the @@ -69,9 +69,7 @@ Notes for some targets: - zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with other compilers. Use "make test" to check your compiler. -- gzdopen is not supported on RISCOS or BEOS. - -- For PalmOs, see http://palmzlib.sourceforge.net/ +- For PalmOs, see https://palmzlib.sourceforge.net/ Acknowledgments: @@ -83,7 +81,7 @@ Acknowledgments: Copyright notice: - (C) 1995-2024 Jean-loup Gailly and Mark Adler + (C) 1995-2026 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages diff --git a/sys/contrib/zlib/compress.c b/sys/contrib/zlib/compress.c index f43bacf7ab97..bd74b9488eb8 100644 --- a/sys/contrib/zlib/compress.c +++ b/sys/contrib/zlib/compress.c @@ -1,5 +1,5 @@ /* compress.c -- compress a memory buffer - * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler + * Copyright (C) 1995-2026 Jean-loup Gailly, Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -18,13 +18,19 @@ compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if there was not enough room in the output buffer, Z_STREAM_ERROR if the level parameter is invalid. + + The _z versions of the functions take size_t length arguments. */ -int ZEXPORT compress2(Bytef *dest, uLongf *destLen, const Bytef *source, - uLong sourceLen, int level) { +int ZEXPORT compress2_z(Bytef *dest, z_size_t *destLen, const Bytef *source, + z_size_t sourceLen, int level) { z_stream stream; int err; const uInt max = (uInt)-1; - uLong left; + z_size_t left; + + if ((sourceLen > 0 && source == NULL) || + destLen == NULL || (*destLen > 0 && dest == NULL)) + return Z_STREAM_ERROR; left = *destLen; *destLen = 0; @@ -43,23 +49,36 @@ int ZEXPORT compress2(Bytef *dest, uLongf *destLen, const Bytef *source, do { if (stream.avail_out == 0) { - stream.avail_out = left > (uLong)max ? max : (uInt)left; + stream.avail_out = left > (z_size_t)max ? max : (uInt)left; left -= stream.avail_out; } if (stream.avail_in == 0) { - stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen; + stream.avail_in = sourceLen > (z_size_t)max ? max : + (uInt)sourceLen; sourceLen -= stream.avail_in; } err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH); } while (err == Z_OK); - *destLen = stream.total_out; + *destLen = (z_size_t)(stream.next_out - dest); deflateEnd(&stream); return err == Z_STREAM_END ? Z_OK : err; } - +int ZEXPORT compress2(Bytef *dest, uLongf *destLen, const Bytef *source, + uLong sourceLen, int level) { + int ret; + z_size_t got = *destLen; + ret = compress2_z(dest, &got, source, sourceLen, level); + *destLen = (uLong)got; + return ret; +} /* =========================================================================== */ +int ZEXPORT compress_z(Bytef *dest, z_size_t *destLen, const Bytef *source, + z_size_t sourceLen) { + return compress2_z(dest, destLen, source, sourceLen, + Z_DEFAULT_COMPRESSION); +} int ZEXPORT compress(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen) { return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); @@ -69,7 +88,12 @@ int ZEXPORT compress(Bytef *dest, uLongf *destLen, const Bytef *source, If the default memLevel or windowBits for deflateInit() is changed, then this function needs to be updated. */ +z_size_t ZEXPORT compressBound_z(z_size_t sourceLen) { + z_size_t bound = sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13; + return bound < sourceLen ? (z_size_t)-1 : bound; +} uLong ZEXPORT compressBound(uLong sourceLen) { - return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + - (sourceLen >> 25) + 13; + z_size_t bound = compressBound_z(sourceLen); + return (uLong)bound != bound ? (uLong)-1 : (uLong)bound; } diff --git a/sys/contrib/zlib/contrib/README.contrib b/sys/contrib/zlib/contrib/README.contrib deleted file mode 100644 index 5e5f95054090..000000000000 --- a/sys/contrib/zlib/contrib/README.contrib +++ /dev/null @@ -1,57 +0,0 @@ -All files under this contrib directory are UNSUPPORTED. They were -provided by users of zlib and were not tested by the authors of zlib. -Use at your own risk. Please contact the authors of the contributions -for help about these, not the zlib authors. Thanks. - - -ada/ by Dmitriy Anisimkov <anisimkov@yahoo.com> - Support for Ada - See http://zlib-ada.sourceforge.net/ - -blast/ by Mark Adler <madler@alumni.caltech.edu> - Decompressor for output of PKWare Data Compression Library (DCL) - -delphi/ by Cosmin Truta <cosmint@cs.ubbcluj.ro> - Support for Delphi and C++ Builder - -dotzlib/ by Henrik Ravn <henrik@ravn.com> - Support for Microsoft .Net and Visual C++ .Net - -gcc_gvmat64/by Gilles Vollant <info@winimage.com> - GCC Version of x86 64-bit (AMD64 and Intel EM64t) code for x64 - assembler to replace longest_match() and inflate_fast() - -infback9/ by Mark Adler <madler@alumni.caltech.edu> - Unsupported diffs to infback to decode the deflate64 format - -iostream/ by Kevin Ruland <kevin@rodin.wustl.edu> - A C++ I/O streams interface to the zlib gz* functions - -iostream2/ by Tyge Løvset <Tyge.Lovset@cmr.no> - Another C++ I/O streams interface - -iostream3/ by Ludwig Schwardt <schwardt@sun.ac.za> - and Kevin Ruland <kevin@rodin.wustl.edu> - Yet another C++ I/O streams interface - -minizip/ by Gilles Vollant <info@winimage.com> - Mini zip and unzip based on zlib - Includes Zip64 support by Mathias Svensson <mathias@result42.com> - See http://www.winimage.com/zLibDll/minizip.html - -pascal/ by Bob Dellaca <bobdl@xtra.co.nz> et al. - Support for Pascal - -puff/ by Mark Adler <madler@alumni.caltech.edu> - Small, low memory usage inflate. Also serves to provide an - unambiguous description of the deflate format. - -testzlib/ by Gilles Vollant <info@winimage.com> - Example of the use of zlib - -untgz/ by Pedro A. Aranda Gutierrez <paag@tid.es> - A very simple tar.gz file extractor using zlib - -vstudio/ by Gilles Vollant <info@winimage.com> - Building a minizip-enhanced zlib with Microsoft Visual Studio - Includes vc11 from kreuzerkrieg and vc12 from davispuh diff --git a/sys/contrib/zlib/contrib/gcc_gvmat64/gvmat64.S b/sys/contrib/zlib/contrib/gcc_gvmat64/gvmat64.S deleted file mode 100644 index dd858ddbd16b..000000000000 --- a/sys/contrib/zlib/contrib/gcc_gvmat64/gvmat64.S +++ /dev/null @@ -1,574 +0,0 @@ -/* -;uInt longest_match_x64( -; deflate_state *s, -; IPos cur_match); // current match - -; gvmat64.S -- Asm portion of the optimized longest_match for 32 bits x86_64 -; (AMD64 on Athlon 64, Opteron, Phenom -; and Intel EM64T on Pentium 4 with EM64T, Pentium D, Core 2 Duo, Core I5/I7) -; this file is translation from gvmat64.asm to GCC 4.x (for Linux, Mac XCode) -; Copyright (C) 1995-2010 Jean-loup Gailly, Brian Raiter and Gilles Vollant. -; -; File written by Gilles Vollant, by converting to assembly the longest_match -; from Jean-loup Gailly in deflate.c of zLib and infoZip zip. -; and by taking inspiration on asm686 with masm, optimised assembly code -; from Brian Raiter, written 1998 -; -; This software is provided 'as-is', without any express or implied -; warranty. In no event will the authors be held liable for any damages -; arising from the use of this software. -; -; Permission is granted to anyone to use this software for any purpose, -; including commercial applications, and to alter it and redistribute it -; freely, subject to the following restrictions: -; -; 1. The origin of this software must not be misrepresented; you must not -; claim that you wrote the original software. If you use this software -; in a product, an acknowledgment in the product documentation would be -; appreciated but is not required. -; 2. Altered source versions must be plainly marked as such, and must not be -; misrepresented as being the original software -; 3. This notice may not be removed or altered from any source distribution. -; -; http://www.zlib.net -; http://www.winimage.com/zLibDll -; http://www.muppetlabs.com/~breadbox/software/assembly.html -; -; to compile this file for zLib, I use option: -; gcc -c -arch x86_64 gvmat64.S - - -;uInt longest_match(s, cur_match) -; deflate_state *s; -; IPos cur_match; // current match / -; -; with XCode for Mac, I had strange error with some jump on intel syntax -; this is why BEFORE_JMP and AFTER_JMP are used - */ - - -#define BEFORE_JMP .att_syntax -#define AFTER_JMP .intel_syntax noprefix - -#ifndef NO_UNDERLINE -# define match_init _match_init -# define longest_match _longest_match -#endif - -.intel_syntax noprefix - -.globl match_init, longest_match -.text -longest_match: - - - -#define LocalVarsSize 96 -/* -; register used : rax,rbx,rcx,rdx,rsi,rdi,r8,r9,r10,r11,r12 -; free register : r14,r15 -; register can be saved : rsp -*/ - -#define chainlenwmask (rsp + 8 - LocalVarsSize) -#define nicematch (rsp + 16 - LocalVarsSize) - -#define save_rdi (rsp + 24 - LocalVarsSize) -#define save_rsi (rsp + 32 - LocalVarsSize) -#define save_rbx (rsp + 40 - LocalVarsSize) -#define save_rbp (rsp + 48 - LocalVarsSize) -#define save_r12 (rsp + 56 - LocalVarsSize) -#define save_r13 (rsp + 64 - LocalVarsSize) -#define save_r14 (rsp + 72 - LocalVarsSize) -#define save_r15 (rsp + 80 - LocalVarsSize) - - -/* -; all the +4 offsets are due to the addition of pending_buf_size (in zlib -; in the deflate_state structure since the asm code was first written -; (if you compile with zlib 1.0.4 or older, remove the +4). -; Note : these value are good with a 8 bytes boundary pack structure -*/ - -#define MAX_MATCH 258 -#define MIN_MATCH 3 -#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) - -/* -;;; Offsets for fields in the deflate_state structure. These numbers -;;; are calculated from the definition of deflate_state, with the -;;; assumption that the compiler will dword-align the fields. (Thus, -;;; changing the definition of deflate_state could easily cause this -;;; program to crash horribly, without so much as a warning at -;;; compile time. Sigh.) - -; all the +zlib1222add offsets are due to the addition of fields -; in zlib in the deflate_state structure since the asm code was first written -; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). -; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). -; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). -*/ - - - -/* you can check the structure offset by running - -#include <stdlib.h> -#include <stdio.h> -#include "deflate.h" - -void print_depl() -{ -deflate_state ds; -deflate_state *s=&ds; -printf("size pointer=%u\n",(int)sizeof(void*)); - -printf("#define dsWSize %u\n",(int)(((char*)&(s->w_size))-((char*)s))); -printf("#define dsWMask %u\n",(int)(((char*)&(s->w_mask))-((char*)s))); -printf("#define dsWindow %u\n",(int)(((char*)&(s->window))-((char*)s))); -printf("#define dsPrev %u\n",(int)(((char*)&(s->prev))-((char*)s))); -printf("#define dsMatchLen %u\n",(int)(((char*)&(s->match_length))-((char*)s))); -printf("#define dsPrevMatch %u\n",(int)(((char*)&(s->prev_match))-((char*)s))); -printf("#define dsStrStart %u\n",(int)(((char*)&(s->strstart))-((char*)s))); -printf("#define dsMatchStart %u\n",(int)(((char*)&(s->match_start))-((char*)s))); -printf("#define dsLookahead %u\n",(int)(((char*)&(s->lookahead))-((char*)s))); -printf("#define dsPrevLen %u\n",(int)(((char*)&(s->prev_length))-((char*)s))); -printf("#define dsMaxChainLen %u\n",(int)(((char*)&(s->max_chain_length))-((char*)s))); -printf("#define dsGoodMatch %u\n",(int)(((char*)&(s->good_match))-((char*)s))); -printf("#define dsNiceMatch %u\n",(int)(((char*)&(s->nice_match))-((char*)s))); -} -*/ - -#define dsWSize 68 -#define dsWMask 76 -#define dsWindow 80 -#define dsPrev 96 -#define dsMatchLen 144 -#define dsPrevMatch 148 -#define dsStrStart 156 -#define dsMatchStart 160 -#define dsLookahead 164 -#define dsPrevLen 168 -#define dsMaxChainLen 172 -#define dsGoodMatch 188 -#define dsNiceMatch 192 - -#define window_size [ rcx + dsWSize] -#define WMask [ rcx + dsWMask] -#define window_ad [ rcx + dsWindow] -#define prev_ad [ rcx + dsPrev] -#define strstart [ rcx + dsStrStart] -#define match_start [ rcx + dsMatchStart] -#define Lookahead [ rcx + dsLookahead] //; 0ffffffffh on infozip -#define prev_length [ rcx + dsPrevLen] -#define max_chain_length [ rcx + dsMaxChainLen] -#define good_match [ rcx + dsGoodMatch] -#define nice_match [ rcx + dsNiceMatch] - -/* -; windows: -; parameter 1 in rcx(deflate state s), param 2 in rdx (cur match) - -; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and -; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp -; -; All registers must be preserved across the call, except for -; rax, rcx, rdx, r8, r9, r10, and r11, which are scratch. - -; -; gcc on macosx-linux: -; see http://www.x86-64.org/documentation/abi-0.99.pdf -; param 1 in rdi, param 2 in rsi -; rbx, rsp, rbp, r12 to r15 must be preserved - -;;; Save registers that the compiler may be using, and adjust esp to -;;; make room for our stack frame. - - -;;; Retrieve the function arguments. r8d will hold cur_match -;;; throughout the entire function. edx will hold the pointer to the -;;; deflate_state structure during the function's setup (before -;;; entering the main loop. - -; ms: parameter 1 in rcx (deflate_state* s), param 2 in edx -> r8 (cur match) -; mac: param 1 in rdi, param 2 rsi -; this clear high 32 bits of r8, which can be garbage in both r8 and rdx -*/ - mov [save_rbx],rbx - mov [save_rbp],rbp - - - mov rcx,rdi - - mov r8d,esi - - - mov [save_r12],r12 - mov [save_r13],r13 - mov [save_r14],r14 - mov [save_r15],r15 - - -//;;; uInt wmask = s->w_mask; -//;;; unsigned chain_length = s->max_chain_length; -//;;; if (s->prev_length >= s->good_match) { -//;;; chain_length >>= 2; -//;;; } - - - mov edi, prev_length - mov esi, good_match - mov eax, WMask - mov ebx, max_chain_length - cmp edi, esi - jl LastMatchGood - shr ebx, 2 -LastMatchGood: - -//;;; chainlen is decremented once beforehand so that the function can -//;;; use the sign flag instead of the zero flag for the exit test. -//;;; It is then shifted into the high word, to make room for the wmask -//;;; value, which it will always accompany. - - dec ebx - shl ebx, 16 - or ebx, eax - -//;;; on zlib only -//;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; - - - - mov eax, nice_match - mov [chainlenwmask], ebx - mov r10d, Lookahead - cmp r10d, eax - cmovnl r10d, eax - mov [nicematch],r10d - - - -//;;; register Bytef *scan = s->window + s->strstart; - mov r10, window_ad - mov ebp, strstart - lea r13, [r10 + rbp] - -//;;; Determine how many bytes the scan ptr is off from being -//;;; dword-aligned. - - mov r9,r13 - neg r13 - and r13,3 - -//;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? -//;;; s->strstart - (IPos)MAX_DIST(s) : NIL; - - - mov eax, window_size - sub eax, MIN_LOOKAHEAD - - - xor edi,edi - sub ebp, eax - - mov r11d, prev_length - - cmovng ebp,edi - -//;;; int best_len = s->prev_length; - - -//;;; Store the sum of s->window + best_len in esi locally, and in esi. - - lea rsi,[r10+r11] - -//;;; register ush scan_start = *(ushf*)scan; -//;;; register ush scan_end = *(ushf*)(scan+best_len-1); -//;;; Posf *prev = s->prev; - - movzx r12d,word ptr [r9] - movzx ebx, word ptr [r9 + r11 - 1] - - mov rdi, prev_ad - -//;;; Jump into the main loop. - - mov edx, [chainlenwmask] - - cmp bx,word ptr [rsi + r8 - 1] - jz LookupLoopIsZero - - - -LookupLoop1: - and r8d, edx - - movzx r8d, word ptr [rdi + r8*2] - cmp r8d, ebp - jbe LeaveNow - - - - sub edx, 0x00010000 - BEFORE_JMP - js LeaveNow - AFTER_JMP - -LoopEntry1: - cmp bx,word ptr [rsi + r8 - 1] - BEFORE_JMP - jz LookupLoopIsZero - AFTER_JMP - -LookupLoop2: - and r8d, edx - - movzx r8d, word ptr [rdi + r8*2] - cmp r8d, ebp - BEFORE_JMP - jbe LeaveNow - AFTER_JMP - sub edx, 0x00010000 - BEFORE_JMP - js LeaveNow - AFTER_JMP - -LoopEntry2: - cmp bx,word ptr [rsi + r8 - 1] - BEFORE_JMP - jz LookupLoopIsZero - AFTER_JMP - -LookupLoop4: - and r8d, edx - - movzx r8d, word ptr [rdi + r8*2] - cmp r8d, ebp - BEFORE_JMP - jbe LeaveNow - AFTER_JMP - sub edx, 0x00010000 - BEFORE_JMP - js LeaveNow - AFTER_JMP - -LoopEntry4: - - cmp bx,word ptr [rsi + r8 - 1] - BEFORE_JMP - jnz LookupLoop1 - jmp LookupLoopIsZero - AFTER_JMP -/* -;;; do { -;;; match = s->window + cur_match; -;;; if (*(ushf*)(match+best_len-1) != scan_end || -;;; *(ushf*)match != scan_start) continue; -;;; [...] -;;; } while ((cur_match = prev[cur_match & wmask]) > limit -;;; && --chain_length != 0); -;;; -;;; Here is the inner loop of the function. The function will spend the -;;; majority of its time in this loop, and majority of that time will -;;; be spent in the first ten instructions. -;;; -;;; Within this loop: -;;; ebx = scanend -;;; r8d = curmatch -;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) -;;; esi = windowbestlen - i.e., (window + bestlen) -;;; edi = prev -;;; ebp = limit -*/ -.balign 16 -LookupLoop: - and r8d, edx - - movzx r8d, word ptr [rdi + r8*2] - cmp r8d, ebp - BEFORE_JMP - jbe LeaveNow - AFTER_JMP - sub edx, 0x00010000 - BEFORE_JMP - js LeaveNow - AFTER_JMP - -LoopEntry: - - cmp bx,word ptr [rsi + r8 - 1] - BEFORE_JMP - jnz LookupLoop1 - AFTER_JMP -LookupLoopIsZero: - cmp r12w, word ptr [r10 + r8] - BEFORE_JMP - jnz LookupLoop1 - AFTER_JMP - - -//;;; Store the current value of chainlen. - mov [chainlenwmask], edx -/* -;;; Point edi to the string under scrutiny, and esi to the string we -;;; are hoping to match it up with. In actuality, esi and edi are -;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is -;;; initialized to -(MAX_MATCH_8 - scanalign). -*/ - lea rsi,[r8+r10] - mov rdx, 0xfffffffffffffef8 //; -(MAX_MATCH_8) - lea rsi, [rsi + r13 + 0x0108] //;MAX_MATCH_8] - lea rdi, [r9 + r13 + 0x0108] //;MAX_MATCH_8] - - prefetcht1 [rsi+rdx] - prefetcht1 [rdi+rdx] - -/* -;;; Test the strings for equality, 8 bytes at a time. At the end, -;;; adjust rdx so that it is offset to the exact byte that mismatched. -;;; -;;; We already know at this point that the first three bytes of the -;;; strings match each other, and they can be safely passed over before -;;; starting the compare loop. So what this code does is skip over 0-3 -;;; bytes, as much as necessary in order to dword-align the edi -;;; pointer. (rsi will still be misaligned three times out of four.) -;;; -;;; It should be confessed that this loop usually does not represent -;;; much of the total running time. Replacing it with a more -;;; straightforward "rep cmpsb" would not drastically degrade -;;; performance. -*/ - -LoopCmps: - mov rax, [rsi + rdx] - xor rax, [rdi + rdx] - jnz LeaveLoopCmps - - mov rax, [rsi + rdx + 8] - xor rax, [rdi + rdx + 8] - jnz LeaveLoopCmps8 - - - mov rax, [rsi + rdx + 8+8] - xor rax, [rdi + rdx + 8+8] - jnz LeaveLoopCmps16 - - add rdx,8+8+8 - - BEFORE_JMP - jnz LoopCmps - jmp LenMaximum - AFTER_JMP - -LeaveLoopCmps16: add rdx,8 -LeaveLoopCmps8: add rdx,8 -LeaveLoopCmps: - - test eax, 0x0000FFFF *** 5225 LINES SKIPPED ***home | help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69c6221c.43121.3633e3f0>
