Date: Sun, 22 Nov 2009 21:46:16 GMT From: Yevgen Drachenko <geka@sippysoft.com> To: freebsd-gnats-submit@FreeBSD.org Subject: misc/140794: Add support of Unicode for BIFF8 files. Message-ID: <200911222146.nAMLkGd6069755@www.freebsd.org> Resent-Message-ID: <200911222150.nAMLo0jB042319@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
>Number: 140794 >Category: misc >Synopsis: Add support of Unicode for BIFF8 files. >Confidential: no >Severity: serious >Priority: medium >Responsible: freebsd-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: sw-bug >Submitter-Id: current-users >Arrival-Date: Sun Nov 22 21:50:00 UTC 2009 >Closed-Date: >Last-Modified: >Originator: Yevgen Drachenko >Release: FreeBSD 6.3-PRERELEASE >Organization: Sippy Software, Inc. >Environment: FreeBSD ssp-geka.sippysoft.com 6.3-PRERELEASE FreeBSD 6.3-PRERELEASE #1: Thu Nov 22 02:23:12 UTC 2007 root@pioneer.sippysoft.com:/usr/obj/i386/usr/src/sys/SSP-PRODUCTION i386 >Description: $workbook->setVersion(8) together with $worksheet->setInputEncoding('UTF-8') crashes excel. >How-To-Repeat: >Fix: Patch attached with submission follows: diff -Nur /usr/ports/textproc/pear-Spreadsheet_Excel_Writer/Makefile pear-Spreadsheet_Excel_Writer.new/Makefile --- /usr/ports/textproc/pear-Spreadsheet_Excel_Writer/Makefile 2007-04-14 04:49:04.000000000 +0300 +++ pear-Spreadsheet_Excel_Writer.new/Makefile 2009-11-22 23:15:51.000000000 +0200 @@ -7,6 +7,7 @@ PORTNAME= Spreadsheet_Excel_Writer PORTVERSION= 0.9.1 +PORTREVISION= 1 CATEGORIES= textproc www pear MAINTAINER= pav@FreeBSD.org diff -Nur /usr/ports/textproc/pear-Spreadsheet_Excel_Writer/files/patch-biff8_utf8 pear-Spreadsheet_Excel_Writer.new/files/patch-biff8_utf8 --- /usr/ports/textproc/pear-Spreadsheet_Excel_Writer/files/patch-biff8_utf8 1970-01-01 03:00:00.000000000 +0300 +++ pear-Spreadsheet_Excel_Writer.new/files/patch-biff8_utf8 2009-11-22 23:29:18.000000000 +0200 @@ -0,0 +1,199 @@ +--- Writer/Workbook.php.orig 2005-11-08 05:32:52.000000000 +0200 ++++ Writer/Workbook.php 2009-11-22 23:14:33.000000000 +0200 +@@ -1311,9 +1311,10 @@ + 8228 : Maximum Excel97 block size + -4 : Length of block header + -8 : Length of additional SST header information +- = 8216 ++ -8 : Arbitrary number to keep within _add_continue() limit ++ = 8208 + */ +- $continue_limit = 8216; ++ $continue_limit = 8208; + $block_length = 0; + $written = 0; + $this->_block_sizes = array(); +@@ -1321,6 +1322,9 @@ + + foreach (array_keys($this->_str_table) as $string) { + $string_length = strlen($string); ++ $headerinfo = unpack("vlength/Cencoding", $string); ++ $encoding = $headerinfo["encoding"]; ++ $split_string = 0; + + // Block length is the total length of the strings that will be + // written out in a single SST or CONTINUE block. +@@ -1347,16 +1351,39 @@ + boundaries. Therefore, in some cases we need to reduce the + amount of available + */ ++ $align = 0; ++ ++ # Only applies to Unicode strings ++ if ($encoding == 1) { ++ # Min string + header size -1 ++ $header_length = 4; ++ ++ if ($space_remaining > $header_length) { ++ # String contains 3 byte header => split on odd boundary ++ if (!$split_string && $space_remaining % 2 != 1) { ++ $space_remaining--; ++ $align = 1; ++ } ++ # Split section without header => split on even boundary ++ else if ($split_string && $space_remaining % 2 == 1) { ++ $space_remaining--; ++ $align = 1; ++ } ++ ++ $split_string = 1; ++ } ++ } ++ + + if ($space_remaining > $header_length) { + // Write as much as possible of the string in the current block + $written += $space_remaining; + + // Reduce the current block length by the amount written +- $block_length -= $continue_limit - $continue; ++ $block_length -= $continue_limit - $continue - $align; + + // Store the max size for this block +- $this->_block_sizes[] = $continue_limit; ++ $this->_block_sizes[] = $continue_limit - $align; + + // If the current string was split then the next CONTINUE block + // should have the string continue flag (grbit) set unless the +@@ -1398,13 +1425,19 @@ + This length is required to set the offsets in the BOUNDSHEET records since + they must be written before the SST records + */ +- $total_offset = array_sum($this->_block_sizes); +- // SST information +- $total_offset += 8; +- if (!empty($this->_block_sizes)) { +- $total_offset += (count($this->_block_sizes)) * 4; // add CONTINUE headers +- } +- return $total_offset; ++ ++ $tmp_block_sizes = array(); ++ $tmp_block_sizes = $this->_block_sizes; ++ ++ $length = 12; ++ if (!empty($tmp_block_sizes)) { ++ $length += array_shift($tmp_block_sizes); # SST ++ } ++ while (!empty($tmp_block_sizes)) { ++ $length += 4 + array_shift($tmp_block_sizes); # CONTINUEs ++ } ++ ++ return $length; + } + + /** +@@ -1421,9 +1454,31 @@ + function _storeSharedStringsTable() + { + $record = 0x00fc; // Record identifier ++ $length = 0x0008; // Number of bytes to follow ++ $total = 0x0000; ++ ++ // Iterate through the strings to calculate the CONTINUE block sizes ++ $continue_limit = 8208; ++ $block_length = 0; ++ $written = 0; ++ $continue = 0; ++ + // sizes are upside down +- $this->_block_sizes = array_reverse($this->_block_sizes); +- $length = array_pop($this->_block_sizes) + 8; // First block size plus SST information ++ $tmp_block_sizes = $this->_block_sizes; ++// $tmp_block_sizes = array_reverse($this->_block_sizes); ++ ++ # The SST record is required even if it contains no strings. Thus we will ++ # always have a length ++ # ++ if (!empty($tmp_block_sizes)) { ++ $length = 8 + array_shift($tmp_block_sizes); ++ } ++ else { ++ # No strings ++ $length = 8; ++ } ++ ++ + + // Write the SST block header information + $header = pack("vv", $record, $length); +@@ -1431,18 +1486,14 @@ + $this->_append($header . $data); + + +- // Iterate through the strings to calculate the CONTINUE block sizes +- $continue_limit = 8216; +- $block_length = 0; +- $written = 0; +- $continue = 0; + + + /* TODO: not good for performance */ + foreach (array_keys($this->_str_table) as $string) { + + $string_length = strlen($string); +- $encoding = 0; // assume there are no Unicode strings ++ $headerinfo = unpack("vlength/Cencoding", $string); ++ $encoding = $headerinfo["encoding"]; + $split_string = 0; + + // Block length is the total length of the strings that will be +@@ -1473,6 +1524,30 @@ + + // Unicode data should only be split on char (2 byte) boundaries. + // Therefore, in some cases we need to reduce the amount of available ++ // space by 1 byte to ensure the correct alignment. ++ $align = 0; ++ ++ // Only applies to Unicode strings ++ if ($encoding == 1) { ++ // Min string + header size -1 ++ $header_length = 4; ++ ++ if ($space_remaining > $header_length) { ++ // String contains 3 byte header => split on odd boundary ++ if (!$split_string && $space_remaining % 2 != 1) { ++ $space_remaining--; ++ $align = 1; ++ } ++ // Split section without header => split on even boundary ++ else if ($split_string && $space_remaining % 2 == 1) { ++ $space_remaining--; ++ $align = 1; ++ } ++ ++ $split_string = 1; ++ } ++ } ++ + + if ($space_remaining > $header_length) { + // Write as much as possible of the string in the current block +@@ -1483,7 +1558,7 @@ + $string = substr($string, $space_remaining); + + // Reduce the current block length by the amount written +- $block_length -= $continue_limit - $continue; ++ $block_length -= $continue_limit - $continue - $align; + + // If the current string was split then the next CONTINUE block + // should have the string continue flag (grbit) set unless the +@@ -1503,7 +1578,8 @@ + // Write the CONTINUE block header + if (!empty($this->_block_sizes)) { + $record = 0x003C; +- $length = array_pop($this->_block_sizes); ++ $length = array_shift($tmp_block_sizes); ++ + $header = pack('vv', $record, $length); + if ($continue) { + $header .= pack('C', $encoding); >Release-Note: >Audit-Trail: >Unformatted:
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200911222146.nAMLkGd6069755>