Date: Sun, 22 Nov 2009 21:46:16 GMT From: Yevgen Drachenko <geka@sippysoft.com> To: freebsd-gnats-submit@FreeBSD.org Subject: misc/140794: Add support of Unicode for BIFF8 files. Message-ID: <200911222146.nAMLkGd6069755@www.freebsd.org> Resent-Message-ID: <200911222150.nAMLo0jB042319@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
>Number: 140794
>Category: misc
>Synopsis: Add support of Unicode for BIFF8 files.
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: freebsd-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: sw-bug
>Submitter-Id: current-users
>Arrival-Date: Sun Nov 22 21:50:00 UTC 2009
>Closed-Date:
>Last-Modified:
>Originator: Yevgen Drachenko
>Release: FreeBSD 6.3-PRERELEASE
>Organization:
Sippy Software, Inc.
>Environment:
FreeBSD ssp-geka.sippysoft.com 6.3-PRERELEASE FreeBSD 6.3-PRERELEASE #1: Thu Nov 22 02:23:12 UTC 2007 root@pioneer.sippysoft.com:/usr/obj/i386/usr/src/sys/SSP-PRODUCTION i386
>Description:
$workbook->setVersion(8) together with $worksheet->setInputEncoding('UTF-8') crashes excel.
>How-To-Repeat:
>Fix:
Patch attached with submission follows:
diff -Nur /usr/ports/textproc/pear-Spreadsheet_Excel_Writer/Makefile pear-Spreadsheet_Excel_Writer.new/Makefile
--- /usr/ports/textproc/pear-Spreadsheet_Excel_Writer/Makefile 2007-04-14 04:49:04.000000000 +0300
+++ pear-Spreadsheet_Excel_Writer.new/Makefile 2009-11-22 23:15:51.000000000 +0200
@@ -7,6 +7,7 @@
PORTNAME= Spreadsheet_Excel_Writer
PORTVERSION= 0.9.1
+PORTREVISION= 1
CATEGORIES= textproc www pear
MAINTAINER= pav@FreeBSD.org
diff -Nur /usr/ports/textproc/pear-Spreadsheet_Excel_Writer/files/patch-biff8_utf8 pear-Spreadsheet_Excel_Writer.new/files/patch-biff8_utf8
--- /usr/ports/textproc/pear-Spreadsheet_Excel_Writer/files/patch-biff8_utf8 1970-01-01 03:00:00.000000000 +0300
+++ pear-Spreadsheet_Excel_Writer.new/files/patch-biff8_utf8 2009-11-22 23:29:18.000000000 +0200
@@ -0,0 +1,199 @@
+--- Writer/Workbook.php.orig 2005-11-08 05:32:52.000000000 +0200
++++ Writer/Workbook.php 2009-11-22 23:14:33.000000000 +0200
+@@ -1311,9 +1311,10 @@
+ 8228 : Maximum Excel97 block size
+ -4 : Length of block header
+ -8 : Length of additional SST header information
+- = 8216
++ -8 : Arbitrary number to keep within _add_continue() limit
++ = 8208
+ */
+- $continue_limit = 8216;
++ $continue_limit = 8208;
+ $block_length = 0;
+ $written = 0;
+ $this->_block_sizes = array();
+@@ -1321,6 +1322,9 @@
+
+ foreach (array_keys($this->_str_table) as $string) {
+ $string_length = strlen($string);
++ $headerinfo = unpack("vlength/Cencoding", $string);
++ $encoding = $headerinfo["encoding"];
++ $split_string = 0;
+
+ // Block length is the total length of the strings that will be
+ // written out in a single SST or CONTINUE block.
+@@ -1347,16 +1351,39 @@
+ boundaries. Therefore, in some cases we need to reduce the
+ amount of available
+ */
++ $align = 0;
++
++ # Only applies to Unicode strings
++ if ($encoding == 1) {
++ # Min string + header size -1
++ $header_length = 4;
++
++ if ($space_remaining > $header_length) {
++ # String contains 3 byte header => split on odd boundary
++ if (!$split_string && $space_remaining % 2 != 1) {
++ $space_remaining--;
++ $align = 1;
++ }
++ # Split section without header => split on even boundary
++ else if ($split_string && $space_remaining % 2 == 1) {
++ $space_remaining--;
++ $align = 1;
++ }
++
++ $split_string = 1;
++ }
++ }
++
+
+ if ($space_remaining > $header_length) {
+ // Write as much as possible of the string in the current block
+ $written += $space_remaining;
+
+ // Reduce the current block length by the amount written
+- $block_length -= $continue_limit - $continue;
++ $block_length -= $continue_limit - $continue - $align;
+
+ // Store the max size for this block
+- $this->_block_sizes[] = $continue_limit;
++ $this->_block_sizes[] = $continue_limit - $align;
+
+ // If the current string was split then the next CONTINUE block
+ // should have the string continue flag (grbit) set unless the
+@@ -1398,13 +1425,19 @@
+ This length is required to set the offsets in the BOUNDSHEET records since
+ they must be written before the SST records
+ */
+- $total_offset = array_sum($this->_block_sizes);
+- // SST information
+- $total_offset += 8;
+- if (!empty($this->_block_sizes)) {
+- $total_offset += (count($this->_block_sizes)) * 4; // add CONTINUE headers
+- }
+- return $total_offset;
++
++ $tmp_block_sizes = array();
++ $tmp_block_sizes = $this->_block_sizes;
++
++ $length = 12;
++ if (!empty($tmp_block_sizes)) {
++ $length += array_shift($tmp_block_sizes); # SST
++ }
++ while (!empty($tmp_block_sizes)) {
++ $length += 4 + array_shift($tmp_block_sizes); # CONTINUEs
++ }
++
++ return $length;
+ }
+
+ /**
+@@ -1421,9 +1454,31 @@
+ function _storeSharedStringsTable()
+ {
+ $record = 0x00fc; // Record identifier
++ $length = 0x0008; // Number of bytes to follow
++ $total = 0x0000;
++
++ // Iterate through the strings to calculate the CONTINUE block sizes
++ $continue_limit = 8208;
++ $block_length = 0;
++ $written = 0;
++ $continue = 0;
++
+ // sizes are upside down
+- $this->_block_sizes = array_reverse($this->_block_sizes);
+- $length = array_pop($this->_block_sizes) + 8; // First block size plus SST information
++ $tmp_block_sizes = $this->_block_sizes;
++// $tmp_block_sizes = array_reverse($this->_block_sizes);
++
++ # The SST record is required even if it contains no strings. Thus we will
++ # always have a length
++ #
++ if (!empty($tmp_block_sizes)) {
++ $length = 8 + array_shift($tmp_block_sizes);
++ }
++ else {
++ # No strings
++ $length = 8;
++ }
++
++
+
+ // Write the SST block header information
+ $header = pack("vv", $record, $length);
+@@ -1431,18 +1486,14 @@
+ $this->_append($header . $data);
+
+
+- // Iterate through the strings to calculate the CONTINUE block sizes
+- $continue_limit = 8216;
+- $block_length = 0;
+- $written = 0;
+- $continue = 0;
+
+
+ /* TODO: not good for performance */
+ foreach (array_keys($this->_str_table) as $string) {
+
+ $string_length = strlen($string);
+- $encoding = 0; // assume there are no Unicode strings
++ $headerinfo = unpack("vlength/Cencoding", $string);
++ $encoding = $headerinfo["encoding"];
+ $split_string = 0;
+
+ // Block length is the total length of the strings that will be
+@@ -1473,6 +1524,30 @@
+
+ // Unicode data should only be split on char (2 byte) boundaries.
+ // Therefore, in some cases we need to reduce the amount of available
++ // space by 1 byte to ensure the correct alignment.
++ $align = 0;
++
++ // Only applies to Unicode strings
++ if ($encoding == 1) {
++ // Min string + header size -1
++ $header_length = 4;
++
++ if ($space_remaining > $header_length) {
++ // String contains 3 byte header => split on odd boundary
++ if (!$split_string && $space_remaining % 2 != 1) {
++ $space_remaining--;
++ $align = 1;
++ }
++ // Split section without header => split on even boundary
++ else if ($split_string && $space_remaining % 2 == 1) {
++ $space_remaining--;
++ $align = 1;
++ }
++
++ $split_string = 1;
++ }
++ }
++
+
+ if ($space_remaining > $header_length) {
+ // Write as much as possible of the string in the current block
+@@ -1483,7 +1558,7 @@
+ $string = substr($string, $space_remaining);
+
+ // Reduce the current block length by the amount written
+- $block_length -= $continue_limit - $continue;
++ $block_length -= $continue_limit - $continue - $align;
+
+ // If the current string was split then the next CONTINUE block
+ // should have the string continue flag (grbit) set unless the
+@@ -1503,7 +1578,8 @@
+ // Write the CONTINUE block header
+ if (!empty($this->_block_sizes)) {
+ $record = 0x003C;
+- $length = array_pop($this->_block_sizes);
++ $length = array_shift($tmp_block_sizes);
++
+ $header = pack('vv', $record, $length);
+ if ($continue) {
+ $header .= pack('C', $encoding);
>Release-Note:
>Audit-Trail:
>Unformatted:
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200911222146.nAMLkGd6069755>
